diff --git a/.gitignore b/.gitignore
index 553a077d031a3..06a64184eaa53 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,12 @@
 dist
 *classes
 target/
+build/
+.gradle/
 lib_managed/
 src_managed/
 project/boot/
 project/plugins/project/
-project/sbt_project_definition.iml
 .idea
 .svn
 .classpath
@@ -14,3 +15,14 @@ project/sbt_project_definition.iml
 .#*
 rat.out
 TAGS
+*.iml
+.project
+.settings
+.gradle
+kafka.ipr
+kafka.iws
+.vagrant
+Vagrantfile.local
+
+config/server-*
+config/zookeeper-*
diff --git a/LICENSE b/LICENSE
index cb1800b0c39af..d645695673349 100644
--- a/LICENSE
+++ b/LICENSE
@@ -200,34 +200,3 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
-
------------------------------------------------------------------------
-
-SBT LICENSE
-
-Copyright (c) 2008, 2009, 2010 Mark Harrah, Jason Zaugg
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-3. The name of the author may not be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
------------------------------------------------------------------------
\ No newline at end of file
diff --git a/README-sbt.md b/README-sbt.md
deleted file mode 100644
index 10b8d2523605e..0000000000000
--- a/README-sbt.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# Apache Kafka #
-
-See our [web site](http://kafka.apache.org) for details on the project.
-
-## Building it ##
-1. ./sbt update
-2. ./sbt package
-3. ./sbt assembly-package-dependency
-
-To build for a particular version of Scala (either 2.8.0, 2.8.2, 2.9.1, 2.9.2 or 2.10.1), change step 2 above to: 
-2. ./sbt "++2.8.0 package"
-
-To build for all supported versions of Scala, change step 2 above to: 
-2. ./sbt +package
-
-## Running it ##
-Follow instuctions in http://kafka.apache.org/documentation.html#quickstart
-
-## Running unit tests ##
-  ./sbt test
-
-## Building a binary release zip or gzipped tar ball ##
-  ./sbt release-zip
-  ./sbt release-tar 
-The release file can be found inside ./target/RELEASE/.
-
-## Other Build Tips ##
-Here are some useful sbt commands, to be executed at the sbt command prompt (./sbt). Prefixing with "++<version> " runs the
-command for a specific Scala version, prefixing with "+" will perform the action for all versions of Scala, and no prefix
-runs the command for the default (2.8.0) version of Scala. -
-
-tasks : Lists all the sbt commands and their descriptions
-clean : Deletes all generated files (the target directory).
-compile : Compile all the sub projects, but not create the jars
-test : Run all unit tests in all sub projects
-release-zip : Create all the jars, run unit tests and create a deployable release zip
-release-tar : Create all the jars, run unit tests and create a deployable release gzipped tar tall
-package: Creates jars for src, test, docs etc
-projects : List all the sub projects 
-project sub_project_name : Switch to a particular sub-project. For example, to switch to the core kafka code, use "project core-kafka"
-
-The following commands can be run only on a particular sub project -
-test-only package.test.TestName : Runs only the specified test in the current sub project
-run : Provides options to run any of the classes that have a main method. For example, you can switch to project java-examples, and run the examples there by executing "project java-examples" followed by "run" 
-
-For more details please see the [SBT documentation](https://github.com/harrah/xsbt/wiki)
-
-## Contribution ##
-
-Kafka is a new project, and we are interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html).
-
-To contribute follow the instructions here:
- * http://kafka.apache.org/contributing.html
-
-We also welcome patches for the website and documentation which can be found here:
- * https://svn.apache.org/repos/asf/kafka/site
-
-
-
-
diff --git a/README.md b/README.md
index 9b272b52c8b65..11dfdf9379ad1 100644
--- a/README.md
+++ b/README.md
@@ -1,81 +1,106 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Apache Kafka #
+Apache Kafka
+=================
 See our [web site](http://kafka.apache.org) for details on the project.
 
-## Building a jar and running it ##
-1. ./gradlew copyDependantLibs
-2. ./gradlew jar
-3. Follow instuctions in http://kafka.apache.org/documentation.html#quickstart
+You need to have [gradle](http://www.gradle.org/installation) installed.
 
-## Running unit tests ##
-./gradlew test
+### First bootstrap and download the wrapper ###
+    cd kafka_source_dir
+    gradle
 
-## Forcing re-running unit tests w/o code change ##
-./gradlew cleanTest test
+Now everything else will work
 
-## Running a particular unit test ##
-./gradlew -Dtest.single=RequestResponseSerializationTest core:test
+### Building a jar and running it ###
+    ./gradlew jar  
+
+Follow instuctions in http://kafka.apache.org/documentation.html#quickstart
+
+### Building source jar ###
+    ./gradlew srcJar
+
+### Building javadocs and scaladocs ###
+    ./gradlew javadoc
+    ./gradlew javadocJar # builds a jar from the javadocs
+    ./gradlew scaladoc
+    ./gradlew scaladocJar # builds a jar from the scaladocs
+    ./gradlew docsJar # builds both javadoc and scaladoc jar
+
+### Running unit tests ###
+    ./gradlew test
+
+### Forcing re-running unit tests w/o code change ###
+    ./gradlew cleanTest test
+
+### Running a particular unit test ###
+    ./gradlew -Dtest.single=RequestResponseSerializationTest core:test
+
+### Running a particular unit test with log4j output ###
+    change the log4j setting in either clients/src/test/resources/log4j.properties or core/src/test/resources/log4j.properties
+    ./gradlew -i -Dtest.single=RequestResponseSerializationTest core:test
+
+### Building a binary release gzipped tar ball ###
+    ./gradlew clean
+    ./gradlew releaseTarGz  
+    The above command will fail if you haven't set up the signing key. To bypass signing the artifact, you can run
+    ./gradlew releaseTarGz -x signArchives
 
-## Building a binary release gzipped tar ball ##
-./gradlew clean
-./gradlew releaseTarGz
 The release file can be found inside ./core/build/distributions/.
 
-## Cleaning the build ##
-./gradlew clean
+### Cleaning the build ###
+    ./gradlew clean
+
+### Running a task on a particular version of Scala (either 2.9.1, 2.9.2, 2.10.1 or 2.11) ###
+#### (If building a jar with a version other than 2.10, need to set SCALA_BINARY_VERSION variable or change it in bin/kafka-run-class.sh to run quick start.) ####
+    ./gradlew -PscalaVersion=2.9.1 jar
+    ./gradlew -PscalaVersion=2.9.1 test
+    ./gradlew -PscalaVersion=2.9.1 releaseTarGz
+
+### Running a task for a specific project ###
+This is for 'core', 'contrib:hadoop-consumer', 'contrib:hadoop-producer', 'examples' and 'clients'
+    ./gradlew core:jar
+    ./gradlew core:test
+
+### Listing all gradle tasks ###
+    ./gradlew tasks
+
+### Building IDE project ####
+    ./gradlew eclipse
+    ./gradlew idea
 
-## Running a task on a particular version of Scala (either 2.8.0, 2.8.2, 2.9.1, 2.9.2 or 2.10.1) ##
-## (If building a jar with a version other than 2.8.0, the scala version variable in bin/kafka-run-class.sh needs to be changed to run quick start.) ##
-./gradlew -PscalaVersion=2.9.1 jar
-./gradlew -PscalaVersion=2.9.1 test
-./gradlew -PscalaVersion=2.9.1 releaseTarGz
+### Building the jar for all scala versions and for all projects ###
+    ./gradlew jarAll
 
-## Running a task for a specific project in 'core', 'perf', 'contrib:hadoop-consumer', 'contrib:hadoop-producer', 'examples', 'clients' ##
-./gradlew core:jar
-./gradlew core:test
+### Running unit tests for all scala versions and for all projects ###
+    ./gradlew testAll
 
-## Listing all gradle tasks ##
-./gradlew tasks
+### Building a binary release gzipped tar ball for all scala versions ###
+    ./gradlew releaseTarGzAll
 
-# Building IDE project ##
-./gradlew eclipse
-./gradlew idea
+### Publishing the jar for all version of Scala and for all projects to maven ###
+    ./gradlew uploadArchivesAll
 
-# Building the jar for all scala versions and for all projects ##
-./gradlew jarAll
+Please note for this to work you should create/update `~/.gradle/gradle.properties` and assign the following variables
 
-## Running unit tests for all scala versions and for all projects ##
-./gradlew testAll
+    mavenUrl=
+    mavenUsername=
+    mavenPassword=
+    signing.keyId=
+    signing.password=
+    signing.secretKeyRingFile=
 
-## Building a binary release gzipped tar ball for all scala versions ##
-./gradlew releaseTarGzAll
+### Building the test jar ###
+    ./gradlew testJar
 
-## Publishing the jar for all version of Scala and for all projects to maven (To test locally, change mavenUrl in gradle.properties to a local dir.) ##
-./gradlew uploadArchivesAll
+### Determining how transitive dependencies are added ###
+    ./gradlew core:dependencies --configuration runtime
 
-## Building the test jar ##
-./gradlew testJar
+### Running in Vagrant ###
 
-## Determining how transitive dependencies are added ##
-./gradlew core:dependencies --configuration runtime
+See [vagrant/README.md](vagrant/README.md).
 
-## Contribution ##
+### Contribution ###
 
-Kafka is a new project, and we are interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html).
+Apache Kafka is interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html).
 
 To contribute follow the instructions here:
  * http://kafka.apache.org/contributing.html
diff --git a/Vagrantfile b/Vagrantfile
new file mode 100644
index 0000000000000..55c67ddda4581
--- /dev/null
+++ b/Vagrantfile
@@ -0,0 +1,168 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+require 'socket'
+
+# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
+VAGRANTFILE_API_VERSION = "2"
+
+# General config
+enable_dns = false
+num_zookeepers = 1
+num_brokers = 3
+num_workers = 0 # Generic workers that get the code, but don't start any services
+ram_megabytes = 1280
+
+# EC2
+ec2_access_key = ENV['AWS_ACCESS_KEY']
+ec2_secret_key = ENV['AWS_SECRET_KEY']
+ec2_keypair_name = nil
+ec2_keypair_file = nil
+
+ec2_region = "us-east-1"
+ec2_az = nil # Uses set by AWS
+ec2_ami = "ami-9eaa1cf6"
+ec2_instance_type = "m3.medium"
+ec2_user = "ubuntu"
+ec2_security_groups = nil
+ec2_subnet_id = nil
+# Only override this by setting it to false if you're running in a VPC and you
+# are running Vagrant from within that VPC as well.
+ec2_associate_public_ip = nil
+
+local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local")
+if File.exists?(local_config_file) then
+  eval(File.read(local_config_file), binding, "Vagrantfile.local")
+end
+
+# TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered.
+Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
+  config.hostmanager.enabled = true
+  config.hostmanager.manage_host = enable_dns
+  config.hostmanager.include_offline = false
+
+  ## Provider-specific global configs
+  config.vm.provider :virtualbox do |vb,override|
+    override.vm.box = "ubuntu/trusty64"
+
+    override.hostmanager.ignore_private_ip = false
+
+    # Brokers started with the standard script currently set Xms and Xmx to 1G,
+    # plus we need some extra head room.
+    vb.customize ["modifyvm", :id, "--memory", ram_megabytes.to_s]
+
+    if Vagrant.has_plugin?("vagrant-cachier")
+      config.cache.scope = :box
+      # Besides the defaults, we use a custom cache to handle the Oracle JDK
+      # download, which downloads via wget during an apt install. Because of the
+      # way the installer ends up using its cache directory, we need to jump
+      # through some hoops instead of just specifying a cache directly -- we
+      # share to a temporary location and the provisioning scripts symlink data
+      # to the right location.
+      config.cache.enable :generic, {
+        "oracle-jdk7" => { cache_dir: "/tmp/oracle-jdk7-installer-cache" },
+      }
+    end
+  end
+
+  config.vm.provider :aws do |aws,override|
+    # The "box" is specified as an AMI
+    override.vm.box = "dummy"
+    override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box"
+
+    override.hostmanager.ignore_private_ip = true
+
+    override.ssh.username = ec2_user
+    override.ssh.private_key_path = ec2_keypair_file
+
+    aws.access_key_id = ec2_access_key
+    aws.secret_access_key = ec2_secret_key
+    aws.keypair_name = ec2_keypair_name
+
+    aws.region = ec2_region
+    aws.availability_zone = ec2_az
+    aws.instance_type = ec2_instance_type
+    aws.ami = ec2_ami
+    aws.security_groups = ec2_security_groups
+    aws.subnet_id = ec2_subnet_id
+    # If a subnet is specified, default to turning on a public IP unless the
+    # user explicitly specifies the option. Without a public IP, Vagrant won't
+    # be able to SSH into the hosts unless Vagrant is also running in the VPC.
+    if ec2_associate_public_ip.nil?
+      aws.associate_public_ip = true unless ec2_subnet_id.nil?
+    else
+      aws.associate_public_ip = ec2_associate_public_ip
+    end
+
+    # Exclude some directories that can grow very large from syncing
+    config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['.git', 'core/data/', 'logs/', 'system_test/']
+  end
+
+  def name_node(node, name)
+    node.vm.hostname = name
+    node.vm.provider :aws do |aws|
+      aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name }
+    end
+  end
+
+  def assign_local_ip(node, ip_address)
+    node.vm.provider :virtualbox do |vb,override|
+      override.vm.network :private_network, ip: ip_address
+    end
+  end
+
+  ## Cluster definition
+  zookeepers = []
+  (1..num_zookeepers).each { |i|
+    name = "zk" + i.to_s
+    zookeepers.push(name)
+    config.vm.define name do |zookeeper|
+      name_node(zookeeper, name)
+      ip_address = "192.168.50." + (10 + i).to_s
+      assign_local_ip(zookeeper, ip_address)
+      zookeeper.vm.provision "shell", path: "vagrant/base.sh"
+      zookeeper.vm.provision "shell", path: "vagrant/zk.sh", :args => [i.to_s, num_zookeepers]
+    end
+  }
+
+  (1..num_brokers).each { |i|
+    name = "broker" + i.to_s
+    config.vm.define name do |broker|
+      name_node(broker, name)
+      ip_address = "192.168.50." + (50 + i).to_s
+      assign_local_ip(broker, ip_address)
+      # We need to be careful about what we list as the publicly routable
+      # address since this is registered in ZK and handed out to clients. If
+      # host DNS isn't setup, we shouldn't use hostnames -- IP addresses must be
+      # used to support clients running on the host.
+      zookeeper_connect = zookeepers.map{ |zk_addr| zk_addr + ":2181"}.join(",")
+      broker.vm.provision "shell", path: "vagrant/base.sh"
+      broker.vm.provision "shell", path: "vagrant/broker.sh", :args => [i.to_s, enable_dns ? name : ip_address, zookeeper_connect]
+    end
+  }
+
+  (1..num_workers).each { |i|
+    name = "worker" + i.to_s
+    config.vm.define name do |worker|
+      name_node(worker, name)
+      ip_address = "192.168.50." + (100 + i).to_s
+      assign_local_ip(worker, ip_address)
+      worker.vm.provision "shell", path: "vagrant/base.sh"
+    end
+  }
+
+end
diff --git a/bin/kafka-console-consumer.sh b/bin/kafka-console-consumer.sh
index b86ea56c5c01f..07c90a9bebfed 100755
--- a/bin/kafka-console-consumer.sh
+++ b/bin/kafka-console-consumer.sh
@@ -5,14 +5,17 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export KAFKA_HEAP_OPTS="-Xmx512M"
-exec $(dirname $0)/kafka-run-class.sh kafka.consumer.ConsoleConsumer $@
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
+fi
+
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ConsoleConsumer $@
diff --git a/bin/kafka-console-producer.sh b/bin/kafka-console-producer.sh
index ec0f21f9a2e64..ccca66de44384 100755
--- a/bin/kafka-console-producer.sh
+++ b/bin/kafka-console-producer.sh
@@ -5,14 +5,16 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export KAFKA_HEAP_OPTS="-Xmx512M"
-exec $(dirname $0)/kafka-run-class.sh kafka.producer.ConsoleProducer $@
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
+fi
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ConsoleProducer $@
diff --git a/bin/kafka-simple-consumer-perf-test.sh b/bin/kafka-consumer-offset-checker.sh
similarity index 87%
rename from bin/kafka-simple-consumer-perf-test.sh
rename to bin/kafka-consumer-offset-checker.sh
index c466cc8886a25..c275f7ef6975c 100755
--- a/bin/kafka-simple-consumer-perf-test.sh
+++ b/bin/kafka-consumer-offset-checker.sh
@@ -14,5 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export KAFKA_HEAP_OPTS="-Xmx512M"
-exec $(dirname $0)/kafka-run-class.sh kafka.perf.SimpleConsumerPerformance $@
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker $@
diff --git a/bin/kafka-consumer-perf-test.sh b/bin/kafka-consumer-perf-test.sh
index 38c56c78304c2..ebc513aa7331e 100755
--- a/bin/kafka-consumer-perf-test.sh
+++ b/bin/kafka-consumer-perf-test.sh
@@ -5,14 +5,16 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export KAFKA_HEAP_OPTS="-Xmx512M"
-exec $(dirname $0)/kafka-run-class.sh kafka.perf.ConsumerPerformance $@
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
+fi
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ConsumerPerformance $@
diff --git a/project/build.properties b/bin/kafka-mirror-maker.sh
old mode 100644
new mode 100755
similarity index 90%
rename from project/build.properties
rename to bin/kafka-mirror-maker.sh
index eea46f3ca0a07..56e342cd65788
--- a/project/build.properties
+++ b/bin/kafka-mirror-maker.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -12,6 +13,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#Project properties
-#Mon Feb 28 11:55:49 PST 2011
-sbt.version=0.12.1
+
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.MirrorMaker $@
diff --git a/bin/kafka-producer-perf-test.sh b/bin/kafka-producer-perf-test.sh
index d75ab7d1f303e..84ac9497c5cbf 100755
--- a/bin/kafka-producer-perf-test.sh
+++ b/bin/kafka-producer-perf-test.sh
@@ -5,14 +5,16 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-export KAFKA_HEAP_OPTS="-Xmx512M"
-exec $(dirname $0)/kafka-run-class.sh kafka.perf.ProducerPerformance $@
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
+fi
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ProducerPerformance $@
diff --git a/sbt b/bin/kafka-replica-verification.sh
similarity index 87%
rename from sbt
rename to bin/kafka-replica-verification.sh
index 944ebf8c383ec..ee6d19e7ffa49 100755
--- a/sbt
+++ b/bin/kafka-replica-verification.sh
@@ -1,16 +1,17 @@
+#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-#
+# 
 #    http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-java -Xmx1024M -XX:MaxPermSize=512m -Dbuild.number="$BUILD_NUMBER" -jar `dirname $0`/lib/sbt-launch.jar "$@"
+exec $(dirname $0)/kafka-run-class.sh kafka.tools.ReplicaVerificationTool $@
diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh
index 75a3fc42a2e41..ce3a4d06a27f6 100755
--- a/bin/kafka-run-class.sh
+++ b/bin/kafka-run-class.sh
@@ -5,9 +5,9 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -23,49 +23,44 @@ fi
 base_dir=$(dirname $0)/..
 
 # create logs directory
-LOG_DIR=$base_dir/logs
-if [ ! -d $LOG_DIR ]; then
-	mkdir $LOG_DIR
+if [ "x$LOG_DIR" = "x" ]; then
+    LOG_DIR="$base_dir/logs"
+fi
+
+if [ ! -d "$LOG_DIR" ]; then
+    mkdir -p "$LOG_DIR"
 fi
 
 if [ -z "$SCALA_VERSION" ]; then
-	SCALA_VERSION=2.8.0
+	SCALA_VERSION=2.10.4
 fi
 
-# TODO: remove when removing sbt
-# assume all dependencies have been packaged into one jar with sbt-assembly's task "assembly-package-dependency"
-for file in $base_dir/core/target/scala-${SCALA_VERSION}/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
+if [ -z "$SCALA_BINARY_VERSION" ]; then
+	SCALA_BINARY_VERSION=2.10
+fi
 
 # run ./gradlew copyDependantLibs to get all dependant jars in a local dir
-for file in $base_dir/core/build/dependant-libs-${SCALA_VERSION}/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/perf/build/libs//kafka-perf_${SCALA_VERSION}*.jar;
+for file in $base_dir/core/build/dependant-libs-${SCALA_VERSION}*/*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
 
-for file in $base_dir/clients/build/libs//kafka-clients*.jar;
+for file in $base_dir/examples/build/libs//kafka-examples*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
 
-for file in $base_dir/examples/build/libs//kafka-examples*.jar;
+for file in $base_dir/contrib/hadoop-consumer/build/libs//kafka-hadoop-consumer*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
 
-for file in $base_dir/contrib/hadoop-consumer/build/libs//kafka-hadoop-consumer*.jar;
+for file in $base_dir/contrib/hadoop-producer/build/libs//kafka-hadoop-producer*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
 
-for file in $base_dir/contrib/hadoop-producer/build/libs//kafka-hadoop-producer*.jar;
+for file in $base_dir/clients/build/libs/kafka-clients*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
@@ -76,7 +71,7 @@ do
   CLASSPATH=$CLASSPATH:$file
 done
 
-for file in $base_dir/core/build/libs/kafka_${SCALA_VERSION}*.jar;
+for file in $base_dir/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar;
 do
   CLASSPATH=$CLASSPATH:$file
 done
@@ -159,6 +154,3 @@ if [ "x$DAEMON_MODE" = "xtrue" ]; then
 else
   exec $JAVA $KAFKA_HEAP_OPTS $KAFKA_JVM_PERFORMANCE_OPTS $KAFKA_GC_LOG_OPTS $KAFKA_JMX_OPTS $KAFKA_LOG4J_OPTS -cp $CLASSPATH $KAFKA_OPTS "$@"
 fi
-
-
-
diff --git a/bin/kafka-server-start.sh b/bin/kafka-server-start.sh
index 7050649e6795b..dc01d46a13f16 100755
--- a/bin/kafka-server-start.sh
+++ b/bin/kafka-server-start.sh
@@ -16,12 +16,18 @@
 
 if [ $# -lt 1 ];
 then
-	echo "USAGE: $0 [-daemon] server.properties"
+	echo "USAGE: $0 [-daemon] server.properties [--override property=value]*"
 	exit 1
 fi
 base_dir=$(dirname $0)
-export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
-export KAFKA_HEAP_OPTS="-Xmx1G -Xms1G"
+
+if [ "x$KAFKA_LOG4J_OPTS" = "x" ]; then
+    export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
+fi
+
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx1G -Xms1G"
+fi
 
 EXTRA_ARGS="-name kafkaServer -loggc"
 
diff --git a/bin/kafka-server-stop.sh b/bin/kafka-server-stop.sh
index 35a26a6529a91..cd8160c41e7b0 100755
--- a/bin/kafka-server-stop.sh
+++ b/bin/kafka-server-stop.sh
@@ -13,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -SIGINT
+ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -SIGTERM
diff --git a/bin/windows/kafka-console-consumer.bat b/bin/windows/kafka-console-consumer.bat
index a287b9e3056e4..f70f98a3899bb 100644
--- a/bin/windows/kafka-console-consumer.bat
+++ b/bin/windows/kafka-console-consumer.bat
@@ -15,6 +15,6 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 
 SetLocal
-set KAFKA_OPTS=-Xmx512M -server -Dcom.sun.management.jmxremote -Dlog4j.configuration=file:"%CD%\kafka-console-consumer-log4j.properties"
-kafka-run-class.bat kafka.consumer.ConsoleConsumer %*
+set KAFKA_HEAP_OPTS=-Xmx512M
+%~dp0kafka-run-class.bat kafka.tools.ConsoleConsumer %*
 EndLocal
diff --git a/bin/windows/kafka-console-producer.bat b/bin/windows/kafka-console-producer.bat
index b321ee28e8129..a5b57de7ba6c1 100644
--- a/bin/windows/kafka-console-producer.bat
+++ b/bin/windows/kafka-console-producer.bat
@@ -14,4 +14,7 @@ rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 rem See the License for the specific language governing permissions and
 rem limitations under the License.
 
-kafka-run-class.bat kafka.producer.ConsoleProducer %*
+SetLocal
+set KAFKA_HEAP_OPTS=-Xmx512M
+%~dp0kafka-run-class.bat kafka.tools.ConsoleProducer %*
+EndLocal
diff --git a/sbt.bat b/bin/windows/kafka-consumer-offset-checker.bat
similarity index 91%
rename from sbt.bat
rename to bin/windows/kafka-consumer-offset-checker.bat
index 9321ada9316ac..b6967c49609c3 100644
--- a/sbt.bat
+++ b/bin/windows/kafka-consumer-offset-checker.bat
@@ -1,17 +1,17 @@
-@echo off
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem     http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-
-java -Xmx1024M -XX:MaxPermSize=512m -jar lib\sbt-launch.jar "%1"
\ No newline at end of file
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.tools.ConsumerOffsetChecker %*
diff --git a/bin/windows/kafka-consumer-perf-test.bat b/bin/windows/kafka-consumer-perf-test.bat
new file mode 100644
index 0000000000000..afc2259cfd372
--- /dev/null
+++ b/bin/windows/kafka-consumer-perf-test.bat
@@ -0,0 +1,20 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+SetLocal
+set KAFKA_HEAP_OPTS=-Xmx512M -Xms512M
+%~dp0kafka-run-class.bat kafka.tools.ConsumerPerformance %*
+EndLocal
diff --git a/bin/windows/kafka-mirror-maker.bat b/bin/windows/kafka-mirror-maker.bat
new file mode 100644
index 0000000000000..819e7d8248163
--- /dev/null
+++ b/bin/windows/kafka-mirror-maker.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.tools.MirrorMaker %*
diff --git a/bin/windows/kafka-preferred-replica-election.bat b/bin/windows/kafka-preferred-replica-election.bat
new file mode 100644
index 0000000000000..a9a5b7e1dbcac
--- /dev/null
+++ b/bin/windows/kafka-preferred-replica-election.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.admin.PreferredReplicaLeaderElectionCommand %*
diff --git a/bin/windows/kafka-producer-perf-test.bat b/bin/windows/kafka-producer-perf-test.bat
new file mode 100644
index 0000000000000..a894752b0eadd
--- /dev/null
+++ b/bin/windows/kafka-producer-perf-test.bat
@@ -0,0 +1,20 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+SetLocal
+set KAFKA_HEAP_OPTS=-Xmx512M
+%~dp0kafka-run-class.bat kafka.tools.ProducerPerformance %*
+EndLocal
diff --git a/bin/windows/kafka-reassign-partitions.bat b/bin/windows/kafka-reassign-partitions.bat
new file mode 100644
index 0000000000000..0c13ee38d8df2
--- /dev/null
+++ b/bin/windows/kafka-reassign-partitions.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.admin.ReassignPartitionsCommand %*
diff --git a/bin/windows/kafka-replay-log-producer.bat b/bin/windows/kafka-replay-log-producer.bat
new file mode 100644
index 0000000000000..2aec32620a68a
--- /dev/null
+++ b/bin/windows/kafka-replay-log-producer.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.tools.ReplayLogProducer %*
diff --git a/bin/windows/kafka-replica-verification.bat b/bin/windows/kafka-replica-verification.bat
new file mode 100644
index 0000000000000..481db57fda022
--- /dev/null
+++ b/bin/windows/kafka-replica-verification.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.tools.ReplicaVerificationTool %*
diff --git a/bin/windows/kafka-run-class.bat b/bin/windows/kafka-run-class.bat
index f274cd7d883ab..9df3d2b45236b 100644
--- a/bin/windows/kafka-run-class.bat
+++ b/bin/windows/kafka-run-class.bat
@@ -17,76 +17,117 @@ rem limitations under the License.
 setlocal enabledelayedexpansion
 
 IF [%1] EQU [] (
-	echo "USAGE: $0 classname [opts]"
-	goto :eof
+	echo USAGE: %0 classname [opts]
+	EXIT /B 1
 )
 
-set BASE_DIR=%CD%\..
+rem Using pushd popd to set BASE_DIR to the absolute path
+pushd %~dp0..\..
+set BASE_DIR=%CD%
+popd
 set CLASSPATH=
-echo %BASE_DIR%
 
-set ivyPath=%USERPROFILE%\.ivy2\cache
-
-set snappy=%ivyPath%/org.xerial.snappy/snappy-java/bundles/snappy-java-1.0.5.jar
-	call :concat %snappy%
-
-set library=%ivyPath%/org.scala-lang/scala-library/jars/scala-library-2.8.0.jar
-	call :concat %library%
+IF ["%SCALA_VERSION%"] EQU [""] (
+  set SCALA_VERSION=2.10.4
+)
 
-set compiler=%ivyPath%/org.scala-lang/scala-compiler/jars/scala-compiler-2.8.0.jar
-	call :concat %compiler%
+IF ["%SCALA_BINARY_VERSION%"] EQU [""] (
+  set SCALA_BINARY_VERSION=2.10
+)
 
-set log4j=%ivyPath%/log4j/log4j/jars/log4j-1.2.15.jar
-	call :concat %log4j%
+rem Classpath addition for kafka-core dependencies
+for %%i in (%BASE_DIR%\core\build\dependant-libs-%SCALA_VERSION%\*.jar) do (
+	call :concat %%i
+)
 
-set slf=%ivyPath%/org.slf4j/slf4j-api/jars/slf4j-api-1.6.4.jar
-	call :concat %slf%
+rem Classpath addition for kafka-perf dependencies
+for %%i in (%BASE_DIR%\perf\build\dependant-libs-%SCALA_VERSION%\*.jar) do (
+	call :concat %%i
+)
 
-set zookeeper=%ivyPath%/org.apache.zookeeper/zookeeper/jars/zookeeper-3.3.4.jar
-	call :concat %zookeeper%
+rem Classpath addition for kafka-clients
+for %%i in (%BASE_DIR%\clients\build\libs\kafka-clients-*.jar) do (
+	call :concat %%i
+)
 
-set jopt=%ivyPath%/net.sf.jopt-simple/jopt-simple/jars/jopt-simple-3.2.jar
-	call :concat %jopt%
+rem Classpath addition for kafka-examples
+for %%i in (%BASE_DIR%\examples\build\libs\kafka-examples-*.jar) do (
+	call :concat %%i
+)
 
-for %%i in (%BASE_DIR%\core\target\scala-2.8.0\*.jar) do (
+rem Classpath addition for contrib/hadoop-consumer
+for %%i in (%BASE_DIR%\contrib\hadoop-consumer\build\libs\kafka-hadoop-consumer-*.jar) do (
 	call :concat %%i
 )
 
-for %%i in (%BASE_DIR%\core\lib\*.jar) do (
+rem Classpath addition for contrib/hadoop-producer
+for %%i in (%BASE_DIR%\contrib\hadoop-producer\build\libs\kafka-hadoop-producer-*.jar) do (
 	call :concat %%i
 )
 
-for %%i in (%BASE_DIR%\perf\target\scala-2.8.0/kafka*.jar) do (
+rem Classpath addition for release
+for %%i in (%BASE_DIR%\libs\*.jar) do (
 	call :concat %%i
 )
 
-IF ["%KAFKA_JMX_OPTS%"] EQU [""] (
-	set KAFKA_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false
+rem Classpath addition for core
+for %%i in (%BASE_DIR%\core\build\libs\kafka_%SCALA_BINARY_VERSION%*.jar) do (
+	call :concat %%i
 )
 
-IF ["%KAFKA_OPTS%"] EQU [""] (
-	set KAFKA_OPTS=-Xmx512M -server -Dlog4j.configuration=file:"%BASE_DIR%\config\log4j.properties"
+rem JMX settings
+IF ["%KAFKA_JMX_OPTS%"] EQU [""] (
+	set KAFKA_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false  -Dcom.sun.management.jmxremote.ssl=false
 )
 
+rem JMX port to use
 IF ["%JMX_PORT%"] NEQ [""] (
 	set KAFKA_JMX_OPTS=%KAFKA_JMX_OPTS% -Dcom.sun.management.jmxremote.port=%JMX_PORT%
 )
 
+rem Log4j settings
+IF ["%KAFKA_LOG4J_OPTS%"] EQU [""] (
+	set KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%BASE_DIR%/config/tools-log4j.properties
+)
+
+rem Generic jvm settings you want to add
+IF ["%KAFKA_OPTS%"] EQU [""] (
+	set KAFKA_OPTS=
+)
+
+rem Which java to use
 IF ["%JAVA_HOME%"] EQU [""] (
 	set JAVA=java
 ) ELSE (
 	set JAVA="%JAVA_HOME%/bin/java"
 )
 
-set SEARCHTEXT=\bin\..
-set REPLACETEXT=
-set CLASSPATH=!CLASSPATH:%SEARCHTEXT%=%REPLACETEXT%!
-set COMMAND= %JAVA% %KAFKA_OPTS% %KAFKA_JMX_OPTS% -cp %CLASSPATH% %*
-set SEARCHTEXT=-cp ;
-set REPLACETEXT=-cp 
-set COMMAND=!COMMAND:%SEARCHTEXT%=%REPLACETEXT%!
+rem Memory options
+IF ["%KAFKA_HEAP_OPTS%"] EQU [""] (
+	set KAFKA_HEAP_OPTS=-Xmx256M
+)
+
+rem JVM performance options
+IF ["%KAFKA_JVM_PERFORMANCE_OPTS%"] EQU [""] (
+	set KAFKA_JVM_PERFORMANCE_OPTS=-server -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true
+)
+
+IF ["%CLASSPATH%"] EQU [""] (
+	echo Classpath is empty. Please build the project first e.g. by running 'gradlew jarAll'
+	EXIT /B 2
+)
+
+set COMMAND=%JAVA% %KAFKA_HEAP_OPTS% %KAFKA_JVM_PERFORMANCE_OPTS% %KAFKA_JMX_OPTS% %KAFKA_LOG4J_OPTS% -cp %CLASSPATH% %KAFKA_OPTS% %*
+rem echo.
+rem echo %COMMAND%
+rem echo.
 
 %COMMAND%
 
+goto :eof
 :concat
-set CLASSPATH=%CLASSPATH%;"%1"
\ No newline at end of file
+IF ["%CLASSPATH%"] EQU [""] (
+  set CLASSPATH="%1"
+) ELSE (
+  set CLASSPATH=%CLASSPATH%;"%1"
+)
diff --git a/bin/windows/kafka-server-start.bat b/bin/windows/kafka-server-start.bat
index 7db76dd44bab5..6a5aa9fb44b51 100644
--- a/bin/windows/kafka-server-start.bat
+++ b/bin/windows/kafka-server-start.bat
@@ -15,13 +15,12 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 
 IF [%1] EQU [] (
-	echo "USAGE: $0 server.properties [consumer.properties producer.properties]"
-	goto :eof
+	echo USAGE: %0 server.properties
+	EXIT /B 1
 )
 
-IF [%JMX_PORT%] EQU [] (
-	echo Set JMX_PORT to default value : 9999
-	set JMX_PORT=9999
-)
-
-kafka-run-class.bat kafka.Kafka %*
\ No newline at end of file
+SetLocal
+set KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%~dp0../../config/log4j.properties
+set KAFKA_HEAP_OPTS=-Xmx1G -Xms1G
+%~dp0kafka-run-class.bat kafka.Kafka %*
+EndLocal
diff --git a/bin/windows/kafka-simple-consumer-shell.bat b/bin/windows/kafka-simple-consumer-shell.bat
new file mode 100644
index 0000000000000..4e6ea0c575edb
--- /dev/null
+++ b/bin/windows/kafka-simple-consumer-shell.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.tools.SimpleConsumerShell %*
diff --git a/bin/windows/kafka-topics.bat b/bin/windows/kafka-topics.bat
new file mode 100644
index 0000000000000..f1a9e647877f6
--- /dev/null
+++ b/bin/windows/kafka-topics.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+%~dp0kafka-run-class.bat kafka.admin.TopicCommand %*
diff --git a/bin/windows/zookeeper-server-start.bat b/bin/windows/zookeeper-server-start.bat
index 0dd40ba7bb08c..9836283531468 100644
--- a/bin/windows/zookeeper-server-start.bat
+++ b/bin/windows/zookeeper-server-start.bat
@@ -15,8 +15,12 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 
 IF [%1] EQU [] (
-	echo "USAGE: $0 zookeeper.properties"
-	goto :eof
+	echo USAGE: %0 zookeeper.properties
+	EXIT /B 1
 )
 
-kafka-run-class.bat org.apache.zookeeper.server.quorum.QuorumPeerMain %*
+SetLocal
+set KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%~dp0../../config/log4j.properties
+set KAFKA_HEAP_OPTS=-Xmx512M -Xms512M
+%~dp0kafka-run-class.bat org.apache.zookeeper.server.quorum.QuorumPeerMain %*
+EndLocal
diff --git a/bin/windows/zookeeper-shell.bat b/bin/windows/zookeeper-shell.bat
new file mode 100644
index 0000000000000..e98f069fc57f5
--- /dev/null
+++ b/bin/windows/zookeeper-shell.bat
@@ -0,0 +1,22 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+IF [%1] EQU [] (
+	echo USAGE: %0 zookeeper_host:port[/path] [args...]
+	EXIT /B 1
+)
+
+%~dp0kafka-run-class.bat org.apache.zookeeper.ZooKeeperMain -server %*
diff --git a/bin/zookeeper-server-start.sh b/bin/zookeeper-server-start.sh
index 2e7be7486d20a..d96878657cbee 100755
--- a/bin/zookeeper-server-start.sh
+++ b/bin/zookeeper-server-start.sh
@@ -20,8 +20,14 @@ then
 	exit 1
 fi
 base_dir=$(dirname $0)
-export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
-export KAFKA_HEAP_OPTS="-Xmx512M -Xms512M"
+
+if [ "x$KAFKA_LOG4J_OPTS" = "x" ]; then
+    export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
+fi
+
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M -Xms512M"
+fi
 
 EXTRA_ARGS="-name zookeeper -loggc"
 
diff --git a/build.gradle b/build.gradle
index 858d297b9e8bf..18f86e4c8a106 100644
--- a/build.gradle
+++ b/build.gradle
@@ -20,6 +20,8 @@ buildscript {
   apply from: file('gradle/buildscript.gradle'), to: buildscript
 }
 
+def slf4jlog4j='org.slf4j:slf4j-log4j12:1.7.6'
+
 allprojects {
   apply plugin: 'idea'
   repositories {
@@ -27,41 +29,107 @@ allprojects {
   }
 }
 
+ext {
+    gradleVersion = "2.0"
+}
+
+def isVerificationRequired(project) {
+    project.gradle.startParameter.taskNames.any { it.contains("upload") }
+}
+
+apply from: file('wrapper.gradle')
 apply from: file('gradle/license.gradle')
+apply from: file('scala.gradle')
 
 subprojects {
   apply plugin: 'java'
   apply plugin: 'eclipse'
   apply plugin: 'maven'
-  
+  apply plugin: 'signing'
+
+  licenseTest.onlyIf { isVerificationRequired(project) }
+
   uploadArchives {
     repositories {
-      // To test locally, replace mavenUrl in gradle.properties to file://localhost/tmp/myRepo/
-      mavenDeployer {
-        repository(url: "${mavenUrl}") {
-          authentication(userName: "${mavenUsername}", password: "${mavenPassword}")
-        }
-        afterEvaluate {
-          pom.artifactId = "${archivesBaseName}"
-          pom.project {
-            name 'Apache Kafka'
-            packaging 'jar'
-            url 'http://kafka.apache.org'
-            licenses {
-              license {
-                name 'The Apache Software License, Version 2.0'
-                url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
-                distribution 'repo'
+      signing {
+          if (isVerificationRequired(project)) {
+              sign configurations.archives
+
+              // To test locally, replace mavenUrl in ~/.gradle/gradle.properties to file://localhost/tmp/myRepo/
+              mavenDeployer {
+                  beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
+                  repository(url: "${mavenUrl}") {
+                      authentication(userName: "${mavenUsername}", password: "${mavenPassword}")
+                  }
+                  afterEvaluate {
+                      pom.artifactId = "${archivesBaseName}"
+                      pom.project {
+                          name 'Apache Kafka'
+                          packaging 'jar'
+                          url 'http://kafka.apache.org'
+                          licenses {
+                              license {
+                                  name 'The Apache Software License, Version 2.0'
+                                  url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
+                                  distribution 'repo'
+                              }
+                          }
+                      }
+                  }
               }
-            }
           }
-        }
       }
     }
   }
 
+  jar {
+    from '../LICENSE'
+    from '../NOTICE'
+  }
+
+  task srcJar(type:Jar) {
+    classifier = 'sources'
+    from '../LICENSE'
+    from '../NOTICE'
+    from sourceSets.main.java
+  }
+
+  task javadocJar(type: Jar, dependsOn: javadoc) {
+    classifier 'javadoc'
+    from '../LICENSE'
+    from '../NOTICE'
+    from javadoc.destinationDir
+  }
+
+  task docsJar(dependsOn: javadocJar)
+
+  artifacts {
+    archives srcJar
+    archives javadocJar
+  }
+  
+  plugins.withType(ScalaPlugin) {
+    //source jar should also contain scala source:
+    srcJar.from sourceSets.main.scala
+
+    task scaladocJar(type:Jar) {
+      classifier = 'scaladoc'
+      from '../LICENSE'
+      from '../NOTICE'
+      from scaladoc
+    }
+
+    //documentation task should also trigger building scala doc jar
+    docsJar.dependsOn scaladocJar
+
+    artifacts {
+      archives scaladocJar
+    }
+  }
+
   tasks.withType(ScalaCompile) {
     scalaCompileOptions.useAnt = false
+
     configure(scalaCompileOptions.forkOptions) {
       memoryMaximumSize = '1g'
       jvmArgs = ['-XX:MaxPermSize=512m']
@@ -69,7 +137,7 @@ subprojects {
   }
 }
 
-for ( sv in ['2_8_0', '2_8_2', '2_9_1', '2_9_2', '2_10_1'] ) {
+for ( sv in ['2_9_1', '2_9_2', '2_10_4', '2_11'] ) {
   String svInDot = sv.replaceAll( "_", ".")
 
   tasks.create(name: "jar_core_${sv}", type: GradleBuild) {
@@ -84,6 +152,18 @@ for ( sv in ['2_8_0', '2_8_2', '2_9_1', '2_9_2', '2_10_1'] ) {
     startParameter.projectProperties = [scalaVersion: "${svInDot}"]
   }
 
+  tasks.create(name: "srcJar_${sv}", type: GradleBuild) {
+    buildFile = './build.gradle'
+    tasks = ['core:srcJar']
+    startParameter.projectProperties = [scalaVersion: "${svInDot}"]
+  }
+
+  tasks.create(name: "docsJar_${sv}", type: GradleBuild) {
+    buildFile = './build.gradle'
+    tasks = ['core:docsJar']
+    startParameter.projectProperties = [scalaVersion: "${svInDot}"]
+  }
+
   tasks.create(name: "releaseTarGz_${sv}", type: GradleBuild) {
     buildFile = './build.gradle'
     tasks = ['releaseTarGz']
@@ -97,65 +177,54 @@ for ( sv in ['2_8_0', '2_8_2', '2_9_1', '2_9_2', '2_10_1'] ) {
   }
 }
 
-tasks.create(name: "jarAll", dependsOn: ['jar_core_2_8_0', 'jar_core_2_8_2', 'jar_core_2_9_1', 'jar_core_2_9_2', 'jar_core_2_10_1', 'clients:jar', 'perf:jar', 'examples:jar', 'contrib:hadoop-consumer:jar', 'contrib:hadoop-producer:jar']) {
+tasks.create(name: "jarAll", dependsOn: ['jar_core_2_9_1', 'jar_core_2_9_2', 'jar_core_2_10_4', 'jar_core_2_11', 'clients:jar', 'examples:jar', 'contrib:hadoop-consumer:jar', 'contrib:hadoop-producer:jar']) {
 }
 
-tasks.create(name: "testAll", dependsOn: ['test_core_2_8_0', 'test_core_2_8_2', 'test_core_2_9_1', 'test_core_2_9_2', 'test_core_2_10_1', 'clients:test']) {
+tasks.create(name: "srcJarAll", dependsOn: ['srcJar_2_9_1', 'srcJar_2_9_2', 'srcJar_2_10_4', 'srcJar_2_11', 'clients:srcJar', 'examples:srcJar', 'contrib:hadoop-consumer:srcJar', 'contrib:hadoop-producer:srcJar']) { }
+
+tasks.create(name: "docsJarAll", dependsOn: ['docsJar_2_9_1', 'docsJar_2_9_2', 'docsJar_2_10_4', 'docsJar_2_11', 'clients:docsJar', 'examples:docsJar', 'contrib:hadoop-consumer:docsJar', 'contrib:hadoop-producer:docsJar']) { }
+
+tasks.create(name: "testAll", dependsOn: ['test_core_2_9_1', 'test_core_2_9_2', 'test_core_2_10_4', 'test_core_2_11', 'clients:test']) {
 }
 
-tasks.create(name: "releaseTarGzAll", dependsOn: ['releaseTarGz_2_8_0', 'releaseTarGz_2_8_2', 'releaseTarGz_2_9_1', 'releaseTarGz_2_9_2', 'releaseTarGz_2_10_1']) {
+tasks.create(name: "releaseTarGzAll", dependsOn: ['releaseTarGz_2_9_1', 'releaseTarGz_2_9_2', 'releaseTarGz_2_10_4', 'releaseTarGz_2_11']) {
 }
 
-tasks.create(name: "uploadArchivesAll", dependsOn: ['uploadCoreArchives_2_8_0', 'uploadCoreArchives_2_8_2', 'uploadCoreArchives_2_9_1', 'uploadCoreArchives_2_9_2', 'uploadCoreArchives_2_10_1', 'perf:uploadArchives', 'examples:uploadArchives', 'contrib:hadoop-consumer:uploadArchives', 'contrib:hadoop-producer:uploadArchives']) {
+tasks.create(name: "uploadArchivesAll", dependsOn: ['uploadCoreArchives_2_9_1', 'uploadCoreArchives_2_9_2', 'uploadCoreArchives_2_10_4', 'uploadCoreArchives_2_11', 'clients:uploadArchives', 'examples:uploadArchives', 'contrib:hadoop-consumer:uploadArchives', 'contrib:hadoop-producer:uploadArchives']) {
 }
 
 project(':core') {
   println "Building project 'core' with Scala version $scalaVersion"
 
   apply plugin: 'scala'
-  archivesBaseName = "kafka_${scalaVersion}"
+  archivesBaseName = "kafka_${baseScalaVersion}"
 
   def (major, minor, trivial) = scalaVersion.tokenize('.')
 
-  if(major.toInteger() >= 2 && minor.toInteger() >= 9) {
-     sourceSets {
-       main {
-         scala {
-           exclude 'kafka/utils/Annotations_2.8.scala'
-         }
-       }
-     }
-  } else {
-     sourceSets {
-       main {
-         scala {
-           exclude 'kafka/utils/Annotations_2.9+.scala'
-         }
-       }
-     }
-  }
-
   dependencies {
+    compile project(':clients')
     compile "org.scala-lang:scala-library:$scalaVersion"
-    compile 'org.apache.zookeeper:zookeeper:3.3.4'
+    compile 'org.apache.zookeeper:zookeeper:3.4.6'
     compile 'com.101tec:zkclient:0.3'
     compile 'com.yammer.metrics:metrics-core:2.2.0'
-    compile 'com.yammer.metrics:metrics-annotation:2.2.0'
     compile 'net.sf.jopt-simple:jopt-simple:3.2'
-    compile 'org.xerial.snappy:snappy-java:1.0.5'
 
     testCompile 'junit:junit:4.1'
     testCompile 'org.easymock:easymock:3.0'
     testCompile 'org.objenesis:objenesis:1.2'
-    if (scalaVersion.startsWith('2.8')) {
-      testCompile 'org.scalatest:scalatest:1.2'
-    } else if (scalaVersion.startsWith('2.10')) {
+    testCompile project(':clients')
+    if (scalaVersion.startsWith('2.10')) {
       testCompile 'org.scalatest:scalatest_2.10:1.9.1'
+    } else if (scalaVersion.startsWith('2.11')) {
+      compile 'org.scala-lang.modules:scala-xml_2.11:1.0.2'
+      compile 'org.scala-lang.modules:scala-parser-combinators_2.11:1.0.2'
+      testCompile "org.scalatest:scalatest_2.11:2.2.0"
     } else {
       testCompile "org.scalatest:scalatest_$scalaVersion:1.8"
-    }   
+    }
+    testRuntime "$slf4jlog4j"
 
-    zinc 'com.typesafe.zinc:zinc:0.2.5'
+    zinc 'com.typesafe.zinc:zinc:0.3.1'
   }
 
   configurations {
@@ -166,17 +235,22 @@ project(':core') {
     compile.exclude module: 'jmxri'
     compile.exclude module: 'jmxtools'
     compile.exclude module: 'mail'
+    compile.exclude module: 'netty'
   }
 
   tasks.create(name: "copyDependantLibs", type: Copy) {
+    from (configurations.testRuntime) {
+      include('slf4j-log4j12*')
+    }
+    from (configurations.runtime) {
+      exclude('kafka-clients*')
+    }
     into "$buildDir/dependant-libs-${scalaVersion}"
-    from configurations.runtime
   }
 
   tasks.create(name: "releaseTarGz", dependsOn: configurations.archives.artifacts, type: Tar) { 
-    into "."
+    into "kafka_${baseScalaVersion}-${version}"
     compression = Compression.GZIP 
-    classifier = 'dist'
     from(project.file("../bin")) { into "bin/" }
     from(project.file("../config")) { into "config/" }
     from '../LICENSE'
@@ -185,8 +259,18 @@ project(':core') {
     from(configurations.archives.artifacts.files) { into("libs/") }
   }
 
+  jar {
+    dependsOn 'copyDependantLibs'
+  }
+
+  jar.manifest {
+    attributes(
+      'Version': "${version}"
+    )
+  }
+
   task testJar(type: Jar) {
-    appendix = 'test'
+    classifier = 'test'
     from sourceSets.test.output
   }
 
@@ -196,20 +280,9 @@ project(':core') {
         exceptionFormat = 'full'
     }
   }
-}
-
-project(':perf') {
-  println "Building project 'perf' with Scala version $scalaVersion"
-
-  apply plugin: 'scala'
-  archivesBaseName = "kafka-perf_${scalaVersion}"
-
-  dependencies {
-    compile project(':core')
-    compile "org.scala-lang:scala-library:$scalaVersion"
-    compile 'net.sf.jopt-simple:jopt-simple:3.2'
 
-    zinc 'com.typesafe.zinc:zinc:0.2.5'
+  artifacts {
+    archives testJar
   }
 }
 
@@ -224,7 +297,6 @@ project(':contrib:hadoop-consumer') {
     compile "org.codehaus.jackson:jackson-core-asl:1.5.5"
     compile "org.codehaus.jackson:jackson-mapper-asl:1.5.5"
     compile "org.apache.hadoop:hadoop-core:0.20.2"
-    compile files('lib/piggybank.jar')
   }
 
   configurations {
@@ -243,13 +315,13 @@ project(':contrib:hadoop-producer') {
 
   dependencies {
     compile project(':core')
-    compile "org.apache.avro:avro:1.4.0"
+    compile("org.apache.avro:avro:1.4.0") { force = true }
     compile "org.apache.pig:pig:0.8.0"
     compile "commons-logging:commons-logging:1.0.4"
     compile "org.codehaus.jackson:jackson-core-asl:1.5.5"
     compile "org.codehaus.jackson:jackson-mapper-asl:1.5.5"
     compile "org.apache.hadoop:hadoop-core:0.20.2"
-    compile files('lib/piggybank.jar')
+    compile "org.apache.pig:piggybank:0.12.0"
   }
 
   configurations {
@@ -269,17 +341,23 @@ project(':examples') {
   dependencies {
     compile project(':core')
   }
+
 }
 
 project(':clients') {
   archivesBaseName = "kafka-clients"
 
   dependencies {
+    compile "org.slf4j:slf4j-api:1.7.6"
+    compile 'org.xerial.snappy:snappy-java:1.1.1.6'
+    compile 'net.jpountz.lz4:lz4:1.2.0'
+
     testCompile 'com.novocode:junit-interface:0.9'
+    testRuntime "$slf4jlog4j"
   }
 
   task testJar(type: Jar) {
-    appendix = 'test'
+    classifier = 'test'
     from sourceSets.test.output
   }
 
@@ -289,4 +367,10 @@ project(':clients') {
         exceptionFormat = 'full'
     }
   }
+
+  javadoc {
+    include "**/org/apache/kafka/clients/producer/*"
+    include "**/org/apache/kafka/common/errors/*"
+  }
+
 }
diff --git a/clients/build.sbt b/clients/build.sbt
deleted file mode 100644
index ca3c8ee3d7e56..0000000000000
--- a/clients/build.sbt
+++ /dev/null
@@ -1,11 +0,0 @@
-import sbt._
-import Keys._
-import AssemblyKeys._
-
-name := "clients"
-
-libraryDependencies ++= Seq(
-  "com.novocode"          % "junit-interface" % "0.9" % "test"
-)
-
-assemblySettings
diff --git a/clients/src/main/java/org/apache/kafka/clients/ClientRequest.java b/clients/src/main/java/org/apache/kafka/clients/ClientRequest.java
new file mode 100644
index 0000000000000..d32c319d8ee4c
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/ClientRequest.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import org.apache.kafka.common.requests.RequestSend;
+
+/**
+ * A request being sent to the server. This holds both the network send as well as the client-level metadata.
+ */
+public final class ClientRequest {
+
+    private final long createdMs;
+    private final boolean expectResponse;
+    private final RequestSend request;
+    private final Object attachment;
+
+    /**
+     * @param createdMs The unix timestamp in milliseconds for the time at which this request was created.
+     * @param expectResponse Should we expect a response message or is this request complete once it is sent?
+     * @param request The request
+     * @param attachment Associated data with the request
+     */
+    public ClientRequest(long createdMs, boolean expectResponse, RequestSend request, Object attachment) {
+        this.createdMs = createdMs;
+        this.attachment = attachment;
+        this.request = request;
+        this.expectResponse = expectResponse;
+    }
+
+    @Override
+    public String toString() {
+        return "ClientRequest(expectResponse=" + expectResponse + ", payload=" + attachment + ", request=" + request + ")";
+    }
+
+    public boolean expectResponse() {
+        return expectResponse;
+    }
+
+    public RequestSend request() {
+        return request;
+    }
+
+    public Object attachment() {
+        return attachment;
+    }
+
+    public long createdTime() {
+        return createdMs;
+    }
+
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/ClientResponse.java b/clients/src/main/java/org/apache/kafka/clients/ClientResponse.java
new file mode 100644
index 0000000000000..14ef69afabc4b
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/ClientResponse.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import org.apache.kafka.common.protocol.types.Struct;
+
+/**
+ * A response from the server. Contains both the body of the response as well as the correlated request that was
+ * originally sent.
+ */
+public class ClientResponse {
+
+    private final long received;
+    private final boolean disconnected;
+    private final ClientRequest request;
+    private final Struct responseBody;
+
+    /**
+     * @param request The original request
+     * @param received The unix timestamp when this response was received
+     * @param disconnected Whether the client disconnected before fully reading a response
+     * @param responseBody The response contents (or null) if we disconnected or no response was expected
+     */
+    public ClientResponse(ClientRequest request, long received, boolean disconnected, Struct responseBody) {
+        super();
+        this.received = received;
+        this.disconnected = disconnected;
+        this.request = request;
+        this.responseBody = responseBody;
+    }
+
+    public long receivedTime() {
+        return received;
+    }
+
+    public boolean wasDisconnected() {
+        return disconnected;
+    }
+
+    public ClientRequest request() {
+        return request;
+    }
+
+    public Struct responseBody() {
+        return responseBody;
+    }
+
+    public boolean hasResponse() {
+        return responseBody != null;
+    }
+
+    public long requestLatencyMs() {
+        return receivedTime() - this.request.createdTime();
+    }
+
+    @Override
+    public String toString() {
+        return "ClientResponse(received=" + received +
+               ", disconnected=" +
+               disconnected +
+               ", request=" +
+               request +
+               ", responseBody=" +
+               responseBody +
+               ")";
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java b/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java
new file mode 100644
index 0000000000000..8aece7e81a804
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The state of our connection to each node in the cluster.
+ * 
+ */
+final class ClusterConnectionStates {
+    private final long reconnectBackoffMs;
+    private final Map<Integer, NodeConnectionState> nodeState;
+
+    public ClusterConnectionStates(long reconnectBackoffMs) {
+        this.reconnectBackoffMs = reconnectBackoffMs;
+        this.nodeState = new HashMap<Integer, NodeConnectionState>();
+    }
+
+    /**
+     * Return true iff we can currently initiate a new connection to the given node. This will be the case if we are not
+     * connected and haven't been connected for at least the minimum reconnection backoff period.
+     * @param node The node id to check
+     * @param now The current time in MS
+     * @return true if we can initiate a new connection
+     */
+    public boolean canConnect(int node, long now) {
+        NodeConnectionState state = nodeState.get(node);
+        if (state == null)
+            return true;
+        else
+            return state.state == ConnectionState.DISCONNECTED && now - state.lastConnectAttemptMs >= this.reconnectBackoffMs;
+    }
+
+    /**
+     * Return true if we are disconnected from the given node and can't re-establish a connection yet
+     * @param node The node to check
+     * @param now The current time in ms
+     */
+    public boolean isBlackedOut(int node, long now) {
+        NodeConnectionState state = nodeState.get(node);
+        if (state == null)
+            return false;
+        else
+            return state.state == ConnectionState.DISCONNECTED && now - state.lastConnectAttemptMs < this.reconnectBackoffMs;
+    }
+
+    /**
+     * Returns the number of milliseconds to wait, based on the connection state, before attempting to send data. When
+     * disconnected, this respects the reconnect backoff time. When connecting or connected, this handles slow/stalled
+     * connections.
+     * @param node The node to check
+     * @param now The current time in ms
+     */
+    public long connectionDelay(int node, long now) {
+        NodeConnectionState state = nodeState.get(node);
+        if (state == null) return 0;
+        long timeWaited = now - state.lastConnectAttemptMs;
+        if (state.state == ConnectionState.DISCONNECTED) {
+            return Math.max(this.reconnectBackoffMs - timeWaited, 0);
+        }
+        else {
+            // When connecting or connected, we should be able to delay indefinitely since other events (connection or
+            // data acked) will cause a wakeup once data can be sent.
+            return Long.MAX_VALUE;
+        }
+    }
+
+    /**
+     * Enter the connecting state for the given node.
+     * @param node The id of the node we are connecting to
+     * @param now The current time.
+     */
+    public void connecting(int node, long now) {
+        nodeState.put(node, new NodeConnectionState(ConnectionState.CONNECTING, now));
+    }
+
+    /**
+     * Return true iff we have a connection to the give node
+     * @param node The id of the node to check
+     */
+    public boolean isConnected(int node) {
+        NodeConnectionState state = nodeState.get(node);
+        return state != null && state.state == ConnectionState.CONNECTED;
+    }
+
+    /**
+     * Return true iff we are in the process of connecting to the given node
+     * @param node The id of the node
+     */
+    public boolean isConnecting(int node) {
+        NodeConnectionState state = nodeState.get(node);
+        return state != null && state.state == ConnectionState.CONNECTING;
+    }
+
+    /**
+     * Enter the connected state for the given node
+     * @param node The node we have connected to
+     */
+    public void connected(int node) {
+        nodeState(node).state = ConnectionState.CONNECTED;
+    }
+
+    /**
+     * Enter the disconnected state for the given node
+     * @param node The node we have disconnected from
+     */
+    public void disconnected(int node) {
+        nodeState(node).state = ConnectionState.DISCONNECTED;
+    }
+
+    /**
+     * Get the state of our connection to the given state
+     * @param node The id of the node
+     * @return The state of our connection
+     */
+    private NodeConnectionState nodeState(int node) {
+        NodeConnectionState state = this.nodeState.get(node);
+        if (state == null)
+            throw new IllegalStateException("No entry found for node " + node);
+        return state;
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/ConnectionState.java b/clients/src/main/java/org/apache/kafka/clients/ConnectionState.java
new file mode 100644
index 0000000000000..ab7e3220f9b76
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/ConnectionState.java
@@ -0,0 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+/**
+ * The states of a node connection
+ */
+enum ConnectionState {
+    DISCONNECTED, CONNECTING, CONNECTED
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/InFlightRequests.java b/clients/src/main/java/org/apache/kafka/clients/InFlightRequests.java
new file mode 100644
index 0000000000000..936487b16e7ac
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/InFlightRequests.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import java.util.ArrayDeque;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The set of requests which have been sent or are being sent but haven't yet received a response
+ */
+final class InFlightRequests {
+
+    private final int maxInFlightRequestsPerConnection;
+    private final Map<Integer, Deque<ClientRequest>> requests = new HashMap<Integer, Deque<ClientRequest>>();
+
+    public InFlightRequests(int maxInFlightRequestsPerConnection) {
+        this.maxInFlightRequestsPerConnection = maxInFlightRequestsPerConnection;
+    }
+
+    /**
+     * Add the given request to the queue for the node it was directed to
+     */
+    public void add(ClientRequest request) {
+        Deque<ClientRequest> reqs = this.requests.get(request.request().destination());
+        if (reqs == null) {
+            reqs = new ArrayDeque<ClientRequest>();
+            this.requests.put(request.request().destination(), reqs);
+        }
+        reqs.addFirst(request);
+    }
+
+    /**
+     * Get the request queue for the given node
+     */
+    private Deque<ClientRequest> requestQueue(int node) {
+        Deque<ClientRequest> reqs = requests.get(node);
+        if (reqs == null || reqs.isEmpty())
+            throw new IllegalStateException("Response from server for which there are no in-flight requests.");
+        return reqs;
+    }
+
+    /**
+     * Get the oldest request (the one that that will be completed next) for the given node
+     */
+    public ClientRequest completeNext(int node) {
+        return requestQueue(node).pollLast();
+    }
+
+    /**
+     * Get the last request we sent to the given node (but don't remove it from the queue)
+     * @param node The node id
+     */
+    public ClientRequest lastSent(int node) {
+        return requestQueue(node).peekFirst();
+    }
+
+    /**
+     * Complete the last request that was sent to a particular node.
+     * @param node The node the request was sent to
+     * @return The request
+     */
+    public ClientRequest completeLastSent(int node) {
+        return requestQueue(node).pollFirst();
+    }
+
+    /**
+     * Can we send more requests to this node?
+     * 
+     * @param node Node in question
+     * @return true iff we have no requests still being sent to the given node
+     */
+    public boolean canSendMore(int node) {
+        Deque<ClientRequest> queue = requests.get(node);
+        return queue == null || queue.isEmpty() ||
+               (queue.peekFirst().request().completed() && queue.size() < this.maxInFlightRequestsPerConnection);
+    }
+
+    /**
+     * Return the number of inflight requests directed at the given node
+     * @param node The node
+     * @return The request count.
+     */
+    public int inFlightRequestCount(int node) {
+        Deque<ClientRequest> queue = requests.get(node);
+        return queue == null ? 0 : queue.size();
+    }
+
+    /**
+     * Count all in-flight requests for all nodes
+     */
+    public int inFlightRequestCount() {
+        int total = 0;
+        for (Deque<ClientRequest> deque : this.requests.values())
+            total += deque.size();
+        return total;
+    }
+
+    /**
+     * Clear out all the in-flight requests for the given node and return them
+     * 
+     * @param node The node
+     * @return All the in-flight requests for that node that have been removed
+     */
+    public Iterable<ClientRequest> clearAll(int node) {
+        Deque<ClientRequest> reqs = requests.get(node);
+        if (reqs == null) {
+            return Collections.emptyList();
+        } else {
+            return requests.remove(node);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/KafkaClient.java b/clients/src/main/java/org/apache/kafka/clients/KafkaClient.java
new file mode 100644
index 0000000000000..397695568d3fd
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/KafkaClient.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import java.util.List;
+
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.requests.RequestHeader;
+
+/**
+ * The interface for {@link NetworkClient}
+ */
+public interface KafkaClient {
+
+    /**
+     * Check if we are currently ready to send another request to the given node but don't attempt to connect if we
+     * aren't.
+     * @param node The node to check
+     * @param now The current timestamp
+     */
+    public boolean isReady(Node node, long now);
+
+    /**
+     * Initiate a connection to the given node (if necessary), and return true if already connected. The readiness of a
+     * node will change only when poll is invoked.
+     * @param node The node to connect to.
+     * @param now The current time
+     * @return true iff we are ready to immediately initiate the sending of another request to the given node.
+     */
+    public boolean ready(Node node, long now);
+
+    /**
+     * Returns the number of milliseconds to wait, based on the connection state, before attempting to send data. When
+     * disconnected, this respects the reconnect backoff time. When connecting or connected, this handles slow/stalled
+     * connections.
+     * @param node The node to check
+     * @param now The current timestamp
+     * @return The number of milliseconds to wait.
+     */
+    public long connectionDelay(Node node, long now);
+
+    /**
+     * Initiate the sending of the given requests and return any completed responses. Requests can only be sent on ready
+     * connections.
+     * @param requests The requests to send
+     * @param timeout The maximum amount of time to wait for responses in ms
+     * @param now The current time in ms
+     * @throws IllegalStateException If a request is sent to an unready node
+     */
+    public List<ClientResponse> poll(List<ClientRequest> requests, long timeout, long now);
+
+    /**
+     * Choose the node with the fewest outstanding requests. This method will prefer a node with an existing connection,
+     * but will potentially choose a node for which we don't yet have a connection if all existing connections are in
+     * use.
+     * @param now The current time in ms
+     * @return The node with the fewest in-flight requests.
+     */
+    public Node leastLoadedNode(long now);
+
+    /**
+     * The number of currently in-flight requests for which we have not yet returned a response
+     */
+    public int inFlightRequestCount();
+
+    /**
+     * Generate a request header for the next request
+     * @param key The API key of the request
+     */
+    public RequestHeader nextRequestHeader(ApiKeys key);
+
+    /**
+     * Wake up the client if it is currently blocked waiting for I/O
+     */
+    public void wakeup();
+
+    /**
+     * Close the client and disconnect from all nodes
+     */
+    public void close();
+
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
new file mode 100644
index 0000000000000..525b95e98010c
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
@@ -0,0 +1,414 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.kafka.clients.producer.internals.Metadata;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.network.NetworkReceive;
+import org.apache.kafka.common.network.NetworkSend;
+import org.apache.kafka.common.network.Selectable;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.requests.MetadataRequest;
+import org.apache.kafka.common.requests.MetadataResponse;
+import org.apache.kafka.common.requests.RequestHeader;
+import org.apache.kafka.common.requests.RequestSend;
+import org.apache.kafka.common.requests.ResponseHeader;
+import org.apache.kafka.common.utils.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A network client for asynchronous request/response network i/o. This is an internal class used to implement the
+ * user-facing producer and consumer clients.
+ * <p>
+ * This class is not thread-safe!
+ */
+public class NetworkClient implements KafkaClient {
+
+    private static final Logger log = LoggerFactory.getLogger(NetworkClient.class);
+
+    /* the selector used to perform network i/o */
+    private final Selectable selector;
+
+    /* the current cluster metadata */
+    private final Metadata metadata;
+
+    /* the state of each node's connection */
+    private final ClusterConnectionStates connectionStates;
+
+    /* the set of requests currently being sent or awaiting a response */
+    private final InFlightRequests inFlightRequests;
+
+    /* the socket send buffer size in bytes */
+    private final int socketSendBuffer;
+
+    /* the socket receive size buffer in bytes */
+    private final int socketReceiveBuffer;
+
+    /* the client id used to identify this client in requests to the server */
+    private final String clientId;
+
+    /* a random offset to use when choosing nodes to avoid having all nodes choose the same node */
+    private final int nodeIndexOffset;
+
+    /* the current correlation id to use when sending requests to servers */
+    private int correlation;
+
+    /* true iff there is a metadata request that has been sent and for which we have not yet received a response */
+    private boolean metadataFetchInProgress;
+
+    /* the last timestamp when no broker node is available to connect */
+    private long lastNoNodeAvailableMs;
+
+    public NetworkClient(Selectable selector,
+                         Metadata metadata,
+                         String clientId,
+                         int maxInFlightRequestsPerConnection,
+                         long reconnectBackoffMs,
+                         int socketSendBuffer,
+                         int socketReceiveBuffer) {
+        this.selector = selector;
+        this.metadata = metadata;
+        this.clientId = clientId;
+        this.inFlightRequests = new InFlightRequests(maxInFlightRequestsPerConnection);
+        this.connectionStates = new ClusterConnectionStates(reconnectBackoffMs);
+        this.socketSendBuffer = socketSendBuffer;
+        this.socketReceiveBuffer = socketReceiveBuffer;
+        this.correlation = 0;
+        this.nodeIndexOffset = new Random().nextInt(Integer.MAX_VALUE);
+        this.metadataFetchInProgress = false;
+        this.lastNoNodeAvailableMs = 0;
+    }
+
+    /**
+     * Begin connecting to the given node, return true if we are already connected and ready to send to that node.
+     * @param node The node to check
+     * @param now The current timestamp
+     * @return True if we are ready to send to the given node
+     */
+    @Override
+    public boolean ready(Node node, long now) {
+        if (isReady(node, now))
+            return true;
+
+        if (connectionStates.canConnect(node.id(), now))
+            // if we are interested in sending to a node and we don't have a connection to it, initiate one
+            initiateConnect(node, now);
+
+        return false;
+    }
+
+    /**
+     * Returns the number of milliseconds to wait, based on the connection state, before attempting to send data. When
+     * disconnected, this respects the reconnect backoff time. When connecting or connected, this handles slow/stalled
+     * connections.
+     * @param node The node to check
+     * @param now The current timestamp
+     * @return The number of milliseconds to wait.
+     */
+    @Override
+    public long connectionDelay(Node node, long now) {
+        return connectionStates.connectionDelay(node.id(), now);
+    }
+
+    /**
+     * Check if the node with the given id is ready to send more requests.
+     * @param node The given node id
+     * @param now The current time in ms
+     * @return true if the node is ready
+     */
+    @Override
+    public boolean isReady(Node node, long now) {
+        int nodeId = node.id();
+        if (!this.metadataFetchInProgress && this.metadata.timeToNextUpdate(now) == 0)
+            // if we need to update our metadata now declare all requests unready to make metadata requests first priority
+            return false;
+        else
+            // otherwise we are ready if we are connected and can send more requests
+            return isSendable(nodeId);
+    }
+
+    /**
+     * Are we connected and ready and able to send more requests to the given node?
+     * @param node The node
+     */
+    private boolean isSendable(int node) {
+        return connectionStates.isConnected(node) && inFlightRequests.canSendMore(node);
+    }
+
+    /**
+     * Initiate the given requests and check for any new responses, waiting up to the specified time. Requests can only
+     * be sent for ready nodes.
+     * @param requests The requests to initiate
+     * @param timeout The maximum amount of time to wait (in ms) for responses if there are none immediately
+     * @param now The current time in milliseconds
+     * @return The list of responses received
+     */
+    @Override
+    public List<ClientResponse> poll(List<ClientRequest> requests, long timeout, long now) {
+        List<NetworkSend> sends = new ArrayList<NetworkSend>();
+
+        for (int i = 0; i < requests.size(); i++) {
+            ClientRequest request = requests.get(i);
+            int nodeId = request.request().destination();
+            if (!isSendable(nodeId))
+                throw new IllegalStateException("Attempt to send a request to node " + nodeId + " which is not ready.");
+
+            this.inFlightRequests.add(request);
+            sends.add(request.request());
+        }
+
+        // should we update our metadata?
+        long timeToNextMetadataUpdate = metadata.timeToNextUpdate(now);
+        long timeToNextReconnectAttempt = this.lastNoNodeAvailableMs + metadata.refreshBackoff() - now;
+        // if there is no node available to connect, back off refreshing metadata
+        long metadataTimeout = Math.max(timeToNextMetadataUpdate, timeToNextReconnectAttempt);
+        if (!this.metadataFetchInProgress && metadataTimeout == 0)
+            maybeUpdateMetadata(sends, now);
+
+        // do the I/O
+        try {
+            this.selector.poll(Math.min(timeout, metadataTimeout), sends);
+        } catch (IOException e) {
+            log.error("Unexpected error during I/O in producer network thread", e);
+        }
+
+        List<ClientResponse> responses = new ArrayList<ClientResponse>();
+        handleCompletedSends(responses, now);
+        handleCompletedReceives(responses, now);
+        handleDisconnections(responses, now);
+        handleConnections();
+
+        return responses;
+    }
+
+    /**
+     * Get the number of in-flight requests
+     */
+    @Override
+    public int inFlightRequestCount() {
+        return this.inFlightRequests.inFlightRequestCount();
+    }
+
+    /**
+     * Generate a request header for the given API key
+     * @param key The api key
+     * @return A request header with the appropriate client id and correlation id
+     */
+    @Override
+    public RequestHeader nextRequestHeader(ApiKeys key) {
+        return new RequestHeader(key.id, clientId, correlation++);
+    }
+
+    /**
+     * Interrupt the client if it is blocked waiting on I/O.
+     */
+    @Override
+    public void wakeup() {
+        this.selector.wakeup();
+    }
+
+    /**
+     * Close the network client
+     */
+    @Override
+    public void close() {
+        this.selector.close();
+    }
+
+    /**
+     * Choose the node with the fewest outstanding requests which is at least eligible for connection. This method will
+     * prefer a node with an existing connection, but will potentially choose a node for which we don't yet have a
+     * connection if all existing connections are in use. This method will never choose a node for which there is no
+     * existing connection and from which we have disconnected within the reconnect backoff period.
+     * @return The node with the fewest in-flight requests.
+     */
+    public Node leastLoadedNode(long now) {
+        List<Node> nodes = this.metadata.fetch().nodes();
+        int inflight = Integer.MAX_VALUE;
+        Node found = null;
+        for (int i = 0; i < nodes.size(); i++) {
+            int idx = Utils.abs((this.nodeIndexOffset + i) % nodes.size());
+            Node node = nodes.get(idx);
+            int currInflight = this.inFlightRequests.inFlightRequestCount(node.id());
+            if (currInflight == 0 && this.connectionStates.isConnected(node.id())) {
+                // if we find an established connection with no in-flight requests we can stop right away
+                return node;
+            } else if (!this.connectionStates.isBlackedOut(node.id(), now) && currInflight < inflight) {
+                // otherwise if this is the best we have found so far, record that
+                inflight = currInflight;
+                found = node;
+            }
+        }
+
+        return found;
+    }
+
+    /**
+     * Handle any completed request send. In particular if no response is expected consider the request complete.
+     * @param responses The list of responses to update
+     * @param now The current time
+     */
+    private void handleCompletedSends(List<ClientResponse> responses, long now) {
+        // if no response is expected then when the send is completed, return it
+        for (NetworkSend send : this.selector.completedSends()) {
+            ClientRequest request = this.inFlightRequests.lastSent(send.destination());
+            if (!request.expectResponse()) {
+                this.inFlightRequests.completeLastSent(send.destination());
+                responses.add(new ClientResponse(request, now, false, null));
+            }
+        }
+    }
+
+    /**
+     * Handle any completed receives and update the response list with the responses received.
+     * @param responses The list of responses to update
+     * @param now The current time
+     */
+    private void handleCompletedReceives(List<ClientResponse> responses, long now) {
+        for (NetworkReceive receive : this.selector.completedReceives()) {
+            int source = receive.source();
+            ClientRequest req = inFlightRequests.completeNext(source);
+            ResponseHeader header = ResponseHeader.parse(receive.payload());
+            short apiKey = req.request().header().apiKey();
+            Struct body = (Struct) ProtoUtils.currentResponseSchema(apiKey).read(receive.payload());
+            correlate(req.request().header(), header);
+            if (apiKey == ApiKeys.METADATA.id) {
+                handleMetadataResponse(req.request().header(), body, now);
+            } else {
+                // need to add body/header to response here
+                responses.add(new ClientResponse(req, now, false, body));
+            }
+        }
+    }
+
+    private void handleMetadataResponse(RequestHeader header, Struct body, long now) {
+        this.metadataFetchInProgress = false;
+        MetadataResponse response = new MetadataResponse(body);
+        Cluster cluster = response.cluster();
+        // don't update the cluster if there are no valid nodes...the topic we want may still be in the process of being
+        // created which means we will get errors and no nodes until it exists
+        if (cluster.nodes().size() > 0)
+            this.metadata.update(cluster, now);
+        else
+            log.trace("Ignoring empty metadata response with correlation id {}.", header.correlationId());
+    }
+
+    /**
+     * Handle any disconnected connections
+     * @param responses The list of responses that completed with the disconnection
+     * @param now The current time
+     */
+    private void handleDisconnections(List<ClientResponse> responses, long now) {
+        for (int node : this.selector.disconnected()) {
+            connectionStates.disconnected(node);
+            log.debug("Node {} disconnected.", node);
+            for (ClientRequest request : this.inFlightRequests.clearAll(node)) {
+                log.trace("Cancelled request {} due to node {} being disconnected", request, node);
+                ApiKeys requestKey = ApiKeys.forId(request.request().header().apiKey());
+                if (requestKey == ApiKeys.METADATA)
+                    metadataFetchInProgress = false;
+                else
+                    responses.add(new ClientResponse(request, now, true, null));
+            }
+        }
+        // we got a disconnect so we should probably refresh our metadata and see if that broker is dead
+        if (this.selector.disconnected().size() > 0)
+            this.metadata.requestUpdate();
+    }
+
+    /**
+     * Record any newly completed connections
+     */
+    private void handleConnections() {
+        for (Integer id : this.selector.connected()) {
+            log.debug("Completed connection to node {}", id);
+            this.connectionStates.connected(id);
+        }
+    }
+
+    /**
+     * Validate that the response corresponds to the request we expect or else explode
+     */
+    private void correlate(RequestHeader requestHeader, ResponseHeader responseHeader) {
+        if (requestHeader.correlationId() != responseHeader.correlationId())
+            throw new IllegalStateException("Correlation id for response (" + responseHeader.correlationId() +
+                                            ") does not match request (" +
+                                            requestHeader.correlationId() +
+                                            ")");
+    }
+
+    /**
+     * Create a metadata request for the given topics
+     */
+    private ClientRequest metadataRequest(long now, int node, Set<String> topics) {
+        MetadataRequest metadata = new MetadataRequest(new ArrayList<String>(topics));
+        RequestSend send = new RequestSend(node, nextRequestHeader(ApiKeys.METADATA), metadata.toStruct());
+        return new ClientRequest(now, true, send, null);
+    }
+
+    /**
+     * Add a metadata request to the list of sends if we can make one
+     */
+    private void maybeUpdateMetadata(List<NetworkSend> sends, long now) {
+        Node node = this.leastLoadedNode(now);
+        if (node == null) {
+            log.debug("Give up sending metadata request since no node is available");
+            // mark the timestamp for no node available to connect
+            this.lastNoNodeAvailableMs = now;
+            return;
+        }
+
+        log.debug("Trying to send metadata request to node {}", node.id());
+        if (connectionStates.isConnected(node.id()) && inFlightRequests.canSendMore(node.id())) {
+            Set<String> topics = metadata.topics();
+            this.metadataFetchInProgress = true;
+            ClientRequest metadataRequest = metadataRequest(now, node.id(), topics);
+            log.debug("Sending metadata request {} to node {}", metadataRequest, node.id());
+            sends.add(metadataRequest.request());
+            this.inFlightRequests.add(metadataRequest);
+        } else if (connectionStates.canConnect(node.id(), now)) {
+            // we don't have a connection to this node right now, make one
+            log.debug("Init connection to node {} for sending metadata request in the next iteration", node.id());
+            initiateConnect(node, now);
+        }
+    }
+
+    /**
+     * Initiate a connection to the given node
+     */
+    private void initiateConnect(Node node, long now) {
+        try {
+            log.debug("Initiating connection to node {} at {}:{}.", node.id(), node.host(), node.port());
+            selector.connect(node.id(), new InetSocketAddress(node.host(), node.port()), this.socketSendBuffer, this.socketReceiveBuffer);
+            this.connectionStates.connecting(node.id(), now);
+        } catch (IOException e) {
+            /* attempt failed, we'll try again after the backoff */
+            connectionStates.disconnected(node.id());
+            /* maybe the problem is our metadata, update it */
+            metadata.requestUpdate();
+            log.debug("Error connecting to node {} at {}:{}:", node.id(), node.host(), node.port(), e);
+        }
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/NodeConnectionState.java b/clients/src/main/java/org/apache/kafka/clients/NodeConnectionState.java
new file mode 100644
index 0000000000000..752a979ea0b8b
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/NodeConnectionState.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.clients;
+
+/**
+ * The state of our connection to a node
+ */
+final class NodeConnectionState {
+
+    ConnectionState state;
+    long lastConnectAttemptMs;
+
+    public NodeConnectionState(ConnectionState state, long lastConnectAttempt) {
+        this.state = state;
+        this.lastConnectAttemptMs = lastConnectAttempt;
+    }
+
+    public String toString() {
+        return "NodeState(" + state + ", " + lastConnectAttemptMs + ")";
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ByteArrayDeserializer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ByteArrayDeserializer.java
new file mode 100644
index 0000000000000..514cbd2c27a8d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ByteArrayDeserializer.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.clients.consumer;
+
+import java.util.Map;
+
+public class ByteArrayDeserializer implements Deserializer<byte[]> {
+
+    @Override
+    public void configure(Map<String, ?> configs) {
+        // nothing to do
+    }
+
+    @Override
+    public byte[] deserialize(String topic, byte[] data, boolean isKey) {
+        return data;
+    }
+
+    @Override
+    public void close() {
+        // nothing to do
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java
new file mode 100644
index 0000000000000..1bce50185273d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import java.io.Closeable;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.TopicPartition;
+
+/**
+ * @see KafkaConsumer
+ * @see MockConsumer
+ */
+public interface Consumer<K,V> extends Closeable {
+
+    /**
+     * Incrementally subscribe to the given list of topics. This API is mutually exclusive to 
+     * {@link #subscribe(TopicPartition...) subscribe(partitions)} 
+     * @param topics A variable list of topics that the consumer subscribes to
+     */ 
+    public void subscribe(String...topics);
+
+    /**
+     * Incrementally subscribes to a specific topic and partition. This API is mutually exclusive to 
+     * {@link #subscribe(String...) subscribe(topics)}
+     * @param partitions Partitions to subscribe to
+     */ 
+    public void subscribe(TopicPartition... partitions);
+
+    /**
+     * Unsubscribe from the specific topics. Messages for this topic will not be returned from the next {@link #poll(long) poll()}
+     * onwards. This should be used in conjunction with {@link #subscribe(String...) subscribe(topics)}. It is an error to
+     * unsubscribe from a topic that was never subscribed to using {@link #subscribe(String...) subscribe(topics)} 
+     * @param topics Topics to unsubscribe from
+     */
+    public void unsubscribe(String... topics);
+
+    /**
+     * Unsubscribe from the specific topic partitions. Messages for these partitions will not be returned from the next 
+     * {@link #poll(long) poll()} onwards. This should be used in conjunction with 
+     * {@link #subscribe(TopicPartition...) subscribe(topic, partitions)}. It is an error to
+     * unsubscribe from a partition that was never subscribed to using {@link #subscribe(TopicPartition...) subscribe(partitions)}
+     * @param partitions Partitions to unsubscribe from
+     */
+    public void unsubscribe(TopicPartition... partitions);
+    
+    /**
+     * Fetches data for the subscribed list of topics and partitions
+     * @param timeout  The time, in milliseconds, spent waiting in poll if data is not available. If 0, waits indefinitely. Must not be negative
+     * @return Map of topic to records for the subscribed topics and partitions as soon as data is available for a topic partition. Availability
+     *         of data is controlled by {@link ConsumerConfig#FETCH_MIN_BYTES_CONFIG} and {@link ConsumerConfig#FETCH_MAX_WAIT_MS_CONFIG}.
+     *         If no data is available for timeout ms, returns an empty list
+     */
+    public Map<String, ConsumerRecords<K,V>> poll(long timeout);
+
+    /**
+     * Commits offsets returned on the last {@link #poll(long) poll()} for the subscribed list of topics and partitions.
+     * @param sync If true, the commit should block until the consumer receives an acknowledgment 
+     * @return An {@link OffsetMetadata} object that contains the partition, offset and a corresponding error code. Returns null
+     * if the sync flag is set to false 
+     */
+    public OffsetMetadata commit(boolean sync);
+
+    /**
+     * Commits the specified offsets for the specified list of topics and partitions to Kafka.
+     * @param offsets The map of offsets to commit for the given topic partitions
+     * @param sync If true, commit will block until the consumer receives an acknowledgment 
+     * @return An {@link OffsetMetadata} object that contains the partition, offset and a corresponding error code. Returns null
+     * if the sync flag is set to false. 
+     */
+    public OffsetMetadata commit(Map<TopicPartition, Long> offsets, boolean sync);
+    
+    /**
+     * Overrides the fetch positions that the consumer will use on the next fetch request. If the consumer subscribes to a list of topics
+     * using {@link #subscribe(String...) subscribe(topics)}, an exception will be thrown if the specified topic partition is not owned by
+     * the consumer.  
+     * @param offsets The map of fetch positions per topic and partition
+     */
+    public void seek(Map<TopicPartition, Long> offsets);
+
+    /**
+     * Returns the fetch position of the <i>next message</i> for the specified topic partition to be used on the next {@link #poll(long) poll()}
+     * @param partitions Partitions for which the fetch position will be returned
+     * @return The position from which data will be fetched for the specified partition on the next {@link #poll(long) poll()}
+     */
+    public Map<TopicPartition, Long> position(Collection<TopicPartition> partitions);
+    
+    /**
+     * Fetches the last committed offsets for the input list of partitions 
+     * @param partitions The list of partitions to return the last committed offset for
+     * @return  The list of offsets for the specified list of partitions
+     */
+    public Map<TopicPartition, Long> committed(Collection<TopicPartition> partitions);
+    
+    /**
+     * Fetches offsets before a certain timestamp
+     * @param timestamp The unix timestamp. Value -1 indicates earliest available timestamp. Value -2 indicates latest available timestamp. 
+     * @param partitions The list of partitions for which the offsets are returned
+     * @return The offsets for messages that were written to the server before the specified timestamp.
+     */
+    public Map<TopicPartition, Long> offsetsBeforeTime(long timestamp, Collection<TopicPartition> partitions);
+    
+    /**
+     * Return a map of metrics maintained by the consumer
+     */
+    public Map<String, ? extends Metric> metrics();
+
+    /**
+     * Close this consumer
+     */
+    public void close();
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java
new file mode 100644
index 0000000000000..1d64f08762b0c
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
+
+import java.util.Map;
+
+import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigDef;
+import org.apache.kafka.common.config.ConfigDef.Importance;
+import org.apache.kafka.common.config.ConfigDef.Type;
+
+/**
+ * The consumer configuration keys
+ */
+public class ConsumerConfig extends AbstractConfig {
+    private static final ConfigDef config;
+
+    /**
+     * The identifier of the group this consumer belongs to. This is required if the consumer uses either the
+     * group management functionality by using {@link Consumer#subscribe(String...) subscribe(topics)}. This is also required
+     * if the consumer uses the default Kafka based offset management strategy. 
+     */
+    public static final String GROUP_ID_CONFIG = "group.id";
+    
+    /**
+     * The timeout after which, if the {@link Consumer#poll(long) poll(timeout)} is not invoked, the consumer is
+     * marked dead and a rebalance operation is triggered for the group identified by {@link #GROUP_ID_CONFIG}. Relevant 
+     * if the consumer uses the group management functionality by invoking {@link Consumer#subscribe(String...) subscribe(topics)} 
+     */
+    public static final String SESSION_TIMEOUT_MS = "session.timeout.ms";
+
+    /**
+     * The number of times a consumer sends a heartbeat to the co-ordinator broker within a {@link #SESSION_TIMEOUT_MS} time window.
+     * This frequency affects the latency of a rebalance operation since the co-ordinator broker notifies a consumer of a rebalance 
+     * in the heartbeat response. Relevant if the consumer uses the group management functionality by invoking 
+     * {@link Consumer#subscribe(String...) subscribe(topics)} 
+     */
+    public static final String HEARTBEAT_FREQUENCY = "heartbeat.frequency";
+
+    /**
+     * A list of URLs to use for establishing the initial connection to the cluster. This list should be in the form
+     * <code>host1:port1,host2:port2,...</code>. These urls are just used for the initial connection to discover the
+     * full cluster membership (which may change dynamically) so this list need not contain the full set of servers (you
+     * may want more than one, though, in case a server is down).
+     */
+    public static final String BOOTSTRAP_SERVERS_CONFIG = "bootstrap.servers";
+
+    /**
+     * If true, periodically commit to Kafka the offsets of messages already returned by the consumer. This committed 
+     * offset will be used when the process fails as the position from which the consumption will begin.
+     */
+    public static final String ENABLE_AUTO_COMMIT_CONFIG = "enable.auto.commit";
+    
+    /**
+     * The friendly name of the partition assignment strategy that the server will use to distribute partition ownership
+     * amongst consumer instances when group management is used
+     */
+    public static final String PARTITION_ASSIGNMENT_STRATEGY = "partition.assignment.strategy";
+    
+    /**
+     * The frequency in milliseconds that the consumer offsets are committed to Kafka. Relevant if {@link #ENABLE_AUTO_COMMIT_CONFIG}
+     * is turned on.
+     */
+    public static final String AUTO_COMMIT_INTERVAL_MS_CONFIG = "auto.commit.interval.ms";
+    
+    /**
+     * What to do when there is no initial offset in Kafka or if an offset is out of range:
+     * <ul>
+     * <li> smallest:      automatically reset the offset to the smallest offset
+     * <li> largest:       automatically reset the offset to the largest offset
+     * <li> disable:       throw exception to the consumer if no previous offset is found for the consumer's group
+     * <li> anything else: throw exception to the consumer. 
+     * </ul> 
+     */
+    public static final String AUTO_OFFSET_RESET_CONFIG = "auto.offset.reset";
+    
+    /**
+     * The minimum amount of data the server should return for a fetch request. If insufficient data is available the 
+     * request will wait for that much data to accumulate before answering the request.
+     */
+    public static final String FETCH_MIN_BYTES_CONFIG = "fetch.min.bytes";
+    
+    /**
+     * The maximum amount of time the server will block before answering the fetch request if there isn't sufficient 
+     * data to immediately satisfy {@link #FETCH_MIN_BYTES_CONFIG}. This should be less than or equal to the timeout used in 
+     * {@link KafkaConsumer#poll(long) poll(timeout)}
+     */
+    public static final String FETCH_MAX_WAIT_MS_CONFIG = "fetch.max.wait.ms";
+    
+    /**
+     * The maximum amount of time to block waiting to fetch metadata about a topic the first time a record is received 
+     * from that topic. The consumer will throw a TimeoutException if it could not successfully fetch metadata within
+     * this timeout.
+     */
+    public static final String METADATA_FETCH_TIMEOUT_CONFIG = "metadata.fetch.timeout.ms";
+
+    /**
+     * The total memory used by the consumer to buffer records received from the server. This config is meant to control
+     * the consumer's memory usage, so it is the size of the global fetch buffer that will be shared across all partitions. 
+     */
+    public static final String TOTAL_BUFFER_MEMORY_CONFIG = "total.memory.bytes";
+
+    /**
+     * The minimum amount of memory that should be used to fetch at least one message for a partition. This puts a lower
+     * bound on the consumer's memory utilization when there is at least one message for a partition available on the server.
+     * This size must be at least as large as the maximum message size the server allows or else it is possible for the producer 
+     * to send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying to fetch a large 
+     * message on a certain partition. 
+     */
+    public static final String FETCH_BUFFER_CONFIG = "fetch.buffer.bytes";
+    
+    /**
+     * The id string to pass to the server when making requests. The purpose of this is to be able to track the source
+     * of requests beyond just ip/port by allowing a logical application name to be included.
+     */
+    public static final String CLIENT_ID_CONFIG = "client.id";
+
+    /**
+     * The size of the TCP send buffer to use when fetching data
+     */
+    public static final String SOCKET_RECEIVE_BUFFER_CONFIG = "socket.receive.buffer.bytes";
+
+    /**
+     * The amount of time to wait before attempting to reconnect to a given host. This avoids repeatedly connecting to a
+     * host in a tight loop. This backoff applies to all requests sent by the consumer to the broker.
+     */
+    public static final String RECONNECT_BACKOFF_MS_CONFIG = "reconnect.backoff.ms";
+
+    /** <code>metrics.sample.window.ms</code> */
+    public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = "metrics.sample.window.ms";
+    private static final String METRICS_SAMPLE_WINDOW_MS_DOC = "The metrics system maintains a configurable number of samples over a fixed window size. This configuration " + "controls the size of the window. For example we might maintain two samples each measured over a 30 second period. "
+                                                               + "When a window expires we erase and overwrite the oldest window.";
+
+    /** <code>metrics.num.samples</code> */
+    public static final String METRICS_NUM_SAMPLES_CONFIG = "metrics.num.samples";
+    private static final String METRICS_NUM_SAMPLES_DOC = "The number of samples maintained to compute metrics.";
+
+    /** <code>metric.reporters</code> */
+    public static final String METRIC_REPORTER_CLASSES_CONFIG = "metric.reporters";
+    private static final String METRIC_REPORTER_CLASSES_DOC = "A list of classes to use as metrics reporters. Implementing the <code>MetricReporter</code> interface allows " + "plugging in classes that will be notified of new metric creation. The JmxReporter is always included to register JMX statistics.";
+
+    /** <code>key.deserializer</code> */
+    public static final String KEY_DESERIALIZER_CLASS_CONFIG = "key.deserializer";
+    private static final String KEY_DESERIALIZER_CLASS_DOC = "Deserializer class for key that implements the <code>Deserializer</code> interface.";
+
+    /** <code>value.deserializer</code> */
+    public static final String VALUE_DESERIALIZER_CLASS_CONFIG = "value.deserializer";
+    private static final String VALUE_DESERIALIZER_CLASS_DOC = "Deserializer class for value that implements the <code>Deserializer</code> interface.";
+
+    static {
+        /* TODO: add config docs */
+        config = new ConfigDef().define(BOOTSTRAP_SERVERS_CONFIG, Type.LIST, Importance.HIGH, "blah blah")
+                                .define(GROUP_ID_CONFIG, Type.STRING, Importance.HIGH, "blah blah")
+                                .define(SESSION_TIMEOUT_MS, Type.LONG, 1000, Importance.HIGH, "blah blah")
+                                .define(HEARTBEAT_FREQUENCY, Type.INT, 3, Importance.MEDIUM, "blah blah")
+                                .define(PARTITION_ASSIGNMENT_STRATEGY, Type.STRING, Importance.MEDIUM, "blah blah")
+                                .define(METADATA_FETCH_TIMEOUT_CONFIG, Type.LONG, 60 * 1000, atLeast(0), Importance.MEDIUM, "blah blah")
+                                .define(ENABLE_AUTO_COMMIT_CONFIG, Type.BOOLEAN, true, Importance.MEDIUM, "blah blah")
+                                .define(AUTO_COMMIT_INTERVAL_MS_CONFIG, Type.LONG, 5000, atLeast(0), Importance.LOW, "blah blah")
+                                .define(CLIENT_ID_CONFIG, Type.STRING, "", Importance.LOW, "blah blah")
+                                .define(TOTAL_BUFFER_MEMORY_CONFIG, Type.LONG, 32 * 1024 * 1024L, atLeast(0L), Importance.LOW, "blah blah")
+                                .define(FETCH_BUFFER_CONFIG, Type.INT, 1 * 1024 * 1024, atLeast(0), Importance.HIGH, "blah blah")
+                                .define(SOCKET_RECEIVE_BUFFER_CONFIG, Type.INT, 128 * 1024, atLeast(0), Importance.LOW, "blah blah")
+                                .define(FETCH_MIN_BYTES_CONFIG, Type.LONG, 1024, atLeast(0), Importance.HIGH, "blah blah")
+                                .define(FETCH_MAX_WAIT_MS_CONFIG, Type.LONG, 500, atLeast(0), Importance.LOW, "blah blah")
+                                .define(RECONNECT_BACKOFF_MS_CONFIG, Type.LONG, 10L, atLeast(0L), Importance.LOW, "blah blah")
+                                .define(AUTO_OFFSET_RESET_CONFIG, Type.STRING, "largest", Importance.MEDIUM, "blah blah")
+                                .define(METRICS_SAMPLE_WINDOW_MS_CONFIG,
+                                        Type.LONG,
+                                        30000,
+                                        atLeast(0),
+                                        Importance.LOW,
+                                        METRICS_SAMPLE_WINDOW_MS_DOC)
+                                .define(METRICS_NUM_SAMPLES_CONFIG, Type.INT, 2, atLeast(1), Importance.LOW, METRICS_NUM_SAMPLES_DOC)
+                                .define(METRIC_REPORTER_CLASSES_CONFIG, Type.LIST, "", Importance.LOW, METRIC_REPORTER_CLASSES_DOC)
+                                .define(KEY_DESERIALIZER_CLASS_CONFIG, Type.CLASS, "org.apache.kafka.clients.consumer.ByteArrayDeserializer", Importance.HIGH, KEY_DESERIALIZER_CLASS_DOC)
+                                .define(VALUE_DESERIALIZER_CLASS_CONFIG, Type.CLASS, "org.apache.kafka.clients.consumer.ByteArrayDeserializer", Importance.HIGH, VALUE_DESERIALIZER_CLASS_DOC);
+
+    }
+
+    ConsumerConfig(Map<? extends Object, ? extends Object> props) {
+        super(config, props);
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceCallback.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceCallback.java
new file mode 100644
index 0000000000000..e4cf7d1cfa01c
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceCallback.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import java.util.Collection;
+
+import org.apache.kafka.common.TopicPartition;
+
+/**
+ * A callback interface that the user can implement to manage customized offsets on the start and end of 
+ * every rebalance operation. This callback will execute in the user thread as part of the 
+ * {@link Consumer#poll(long) poll(long)} API on every rebalance attempt.
+ * Default implementation of the callback will {@link Consumer#seek(java.util.Map) seek(offsets)} to the last committed offsets in the
+ * {@link #onPartitionsAssigned(Consumer, Collection) onPartitionsAssigned()} callback. And will commit offsets synchronously
+ * for the specified list of partitions to Kafka in the {@link #onPartitionsRevoked(Consumer, Collection) onPartitionsRevoked()}
+ * callback.
+ */
+public interface ConsumerRebalanceCallback {
+
+    /**
+     * A callback method the user can implement to provide handling of customized offsets on completion of a successful 
+     * rebalance operation. This method will be called after a rebalance operation completes and before the consumer 
+     * starts fetching data.
+     * <p> 
+     * For examples on usage of this API, see Usage Examples section of {@link KafkaConsumer KafkaConsumer}  
+     * @param partitions The list of partitions that are assigned to the consumer after rebalance
+     */
+    public void onPartitionsAssigned(Consumer<?,?> consumer, Collection<TopicPartition> partitions);
+    
+    /**
+     * A callback method the user can implement to provide handling of offset commits to a customized store on the 
+     * start of a rebalance operation. This method will be called before a rebalance operation starts and after the 
+     * consumer stops fetching data. It is recommended that offsets should be committed in this callback to 
+     * either Kafka or a custom offset store to prevent duplicate data 
+     * <p> 
+     * For examples on usage of this API, see Usage Examples section of {@link KafkaConsumer KafkaConsumer}  
+     * @param partitions The list of partitions that were assigned to the consumer on the last rebalance
+     */
+    public void onPartitionsRevoked(Consumer<?,?> consumer, Collection<TopicPartition> partitions);
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java
new file mode 100644
index 0000000000000..16af70a5de52c
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import org.apache.kafka.common.TopicPartition;
+
+/**
+ * A key/value pair to be received from Kafka. This consists of a topic name and a partition number, from which the 
+ * record is being received and an offset that points to the record in a Kafka partition. 
+ */
+public final class ConsumerRecord<K,V> {
+    private final TopicPartition partition; 
+    private final K key;
+    private final V value;
+    private final long offset;
+    private volatile Exception error;
+    
+    /**
+     * Creates a record to be received from a specified topic and partition
+     * 
+     * @param topic     The topic this record is received from
+     * @param partitionId The partition of the topic this record is received from
+     * @param key       The key of the record, if one exists
+     * @param value     The record contents
+     * @param offset    The offset of this record in the corresponding Kafka partition
+     */
+    public ConsumerRecord(String topic, int partitionId, K key, V value, long offset) {
+        this(topic, partitionId, key, value, offset, null);
+    }
+
+    /**
+     * Create a record with no key
+     * 
+     * @param topic The topic this record is received from
+     * @param partitionId The partition of the topic this record is received from
+     * @param value The record contents
+     * @param offset The offset of this record in the corresponding Kafka partition
+     */
+    public ConsumerRecord(String topic, int partitionId, V value, long offset) {
+        this(topic, partitionId, null, value, offset);
+    }
+
+    /**
+     * Creates a record with an error code
+     * @param topic     The topic this record is received from
+     * @param partitionId The partition of the topic this record is received from
+     * @param error     The exception corresponding to the error code returned by the server for this topic partition
+     */
+    public ConsumerRecord(String topic, int partitionId, Exception error) {
+        this(topic, partitionId, null, null, -1L, error);
+    }
+    
+    private ConsumerRecord(String topic, int partitionId, K key, V value, long offset, Exception error) {
+        if (topic == null)
+            throw new IllegalArgumentException("Topic cannot be null");
+        this.partition = new TopicPartition(topic, partitionId);
+        this.key = key;
+        this.value = value;
+        this.offset = offset;  
+        this.error = error;
+    }
+    
+    /**
+     * The topic this record is received from
+     */
+    public String topic() {
+        return partition.topic();
+    }
+
+    /**
+     * The partition from which this record is received 
+     */
+    public int partition() {
+        return partition.partition();
+    }
+    
+    /**
+     * The TopicPartition object containing the topic and partition
+     */
+    public TopicPartition topicAndPartition() {
+        return partition;
+    }
+    
+    /**
+     * The key (or null if no key is specified)
+     * @throws Exception The exception thrown while fetching this record.
+     */
+    public K key() throws Exception {
+        if (this.error != null)
+            throw this.error;
+        return key;
+    }
+
+    /**
+     * The value
+     * @throws Exception The exception thrown while fetching this record.
+     */
+    public V value() throws Exception {
+        if (this.error != null)
+            throw this.error;
+        return value;
+    }
+
+    /**
+     * The position of this record in the corresponding Kafka partition.
+     * @throws Exception The exception thrown while fetching this record.
+     */
+    public long offset() throws Exception {
+        if (this.error != null)
+            throw this.error;
+        return offset;
+    }
+
+    public Exception error() {
+        return this.error;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecords.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecords.java
new file mode 100644
index 0000000000000..bdf4b26942d5a
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecords.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * A container that holds the list {@link ConsumerRecord} per partition for a particular topic. There is one for every topic returned by a 
+ * {@link Consumer#poll(long)} operation. 
+ */
+public class ConsumerRecords<K,V> {
+
+    private final String topic;
+    private final Map<Integer, List<ConsumerRecord<K,V>>> recordsPerPartition;
+    
+    public ConsumerRecords(String topic, Map<Integer, List<ConsumerRecord<K,V>>> records) {
+        this.topic = topic;
+        this.recordsPerPartition = records;
+    }
+    
+    /**
+     * @param partitions The input list of partitions for a particular topic. If no partitions are 
+     * specified, returns records for all partitions
+     * @return The list of {@link ConsumerRecord}s associated with the given partitions.
+     */
+    public List<ConsumerRecord<K,V>> records(int... partitions) {
+        List<ConsumerRecord<K,V>> recordsToReturn = new ArrayList<ConsumerRecord<K,V>>();
+        if(partitions.length == 0) {
+            // return records for all partitions
+            for(Entry<Integer, List<ConsumerRecord<K,V>>> record : recordsPerPartition.entrySet()) {
+                recordsToReturn.addAll(record.getValue());
+            }
+        } else {
+           for(int partition : partitions) {
+               List<ConsumerRecord<K,V>> recordsForThisPartition = recordsPerPartition.get(partition);
+               recordsToReturn.addAll(recordsForThisPartition);
+           }
+        }
+        return recordsToReturn;
+    }
+
+    /**
+     * @return The topic of all records associated with this instance
+     */
+    public String topic() {
+        return this.topic;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/Deserializer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/Deserializer.java
new file mode 100644
index 0000000000000..fa857d4debbc9
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/Deserializer.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.clients.consumer;
+
+import org.apache.kafka.common.Configurable;
+
+/**
+ *
+ * @param <T> Type to be deserialized into.
+ *
+ * A class that implements this interface is expected to have a constructor with no parameter.
+ */
+public interface Deserializer<T> extends Configurable {
+    /**
+     *
+     * @param topic Topic associated with the data
+     * @param data Serialized bytes
+     * @param isKey Is data for key or value
+     * @return deserialized typed data
+     */
+    public T deserialize(String topic, byte[] data, boolean isKey);
+
+    /**
+     * Close this deserializer
+     */
+    public void close();
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java
new file mode 100644
index 0000000000000..a43b1600c707d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java
@@ -0,0 +1,617 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.JmxReporter;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.MetricsReporter;
+import org.apache.kafka.common.utils.ClientUtils;
+import org.apache.kafka.common.utils.SystemTime;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetSocketAddress;
+import java.util.*;
+
+/**
+ * A Kafka client that consumes records from a Kafka cluster.
+ * <P>
+ * The consumer is <i>thread safe</i> and should generally be shared among all threads for best performance.
+ * <p>
+ * The consumer is single threaded and multiplexes I/O over TCP connections to each of the brokers it
+ * needs to communicate with. Failure to close the consumer after use will leak these resources.
+ * <h3>Usage Examples</h3>
+ * The consumer APIs offer flexibility to cover a variety of consumption use cases. Following are some examples to demonstrate the correct use of 
+ * the available APIs. Each of the examples assumes the presence of a user implemented process() method that processes a given batch of messages
+ * and returns the offset of the latest processed message per partition. Note that process() is not part of the consumer API and is only used as
+ * a convenience method to demonstrate the different use cases of the consumer APIs. Here is a sample implementation of such a process() method.
+ * <pre>
+ * {@code
+ * private Map<TopicPartition, Long> process(Map<String, ConsumerRecord<byte[], byte[]> records) {
+ *     Map<TopicPartition, Long> processedOffsets = new HashMap<TopicPartition, Long>();
+ *     for(Entry<String, ConsumerRecords<byte[], byte[]>> recordMetadata : records.entrySet()) {
+ *          List<ConsumerRecord<byte[], byte[]>> recordsPerTopic = recordMetadata.getValue().records();
+ *          for(int i = 0;i < recordsPerTopic.size();i++) {
+ *               ConsumerRecord<byte[], byte[]> record = recordsPerTopic.get(i);
+ *               // process record
+ *               try {
+ *               	processedOffsets.put(record.topicAndpartition(), record.offset());
+ *               } catch (Exception e) {
+ *               	e.printStackTrace();
+ *               }               
+ *          }
+ *     }
+ *     return processedOffsets; 
+ * }
+ * }
+ * </pre>
+ * <p>
+ * This example demonstrates how the consumer can be used to leverage Kafka's group management functionality for automatic consumer load 
+ * balancing and failover. This example assumes that the offsets are stored in Kafka and are automatically committed periodically, 
+ * as controlled by the auto.commit.interval.ms config
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * props.put("group.id", "test");
+ * props.put("session.timeout.ms", "1000");
+ * props.put("enable.auto.commit", "true");
+ * props.put("auto.commit.interval.ms", "10000");
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(props);
+ * consumer.subscribe("foo", "bar");
+ * boolean isRunning = true;
+ * while(isRunning) {
+ *   Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *   process(records);
+ * }
+ * consumer.close();
+ * }
+ * </pre>
+ * This example demonstrates how the consumer can be used to leverage Kafka's group management functionality for automatic consumer load 
+ * balancing and failover. This example assumes that the offsets are stored in Kafka and are manually committed using 
+ * the commit(boolean) API. This example also demonstrates rewinding the consumer's offsets if processing of the consumed
+ * messages fails. Note that this method of rewinding offsets using {@link #seek(Map) seek(offsets)} is only useful for rewinding the offsets
+ * of the current consumer instance. As such, this will not trigger a rebalance or affect the fetch offsets for the other consumer instances.
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * props.put("group.id", "test");
+ * props.put("session.timeout.ms", "1000");
+ * props.put("enable.auto.commit", "false");
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(props);
+ * consumer.subscribe("foo", "bar");
+ * int commitInterval = 100;
+ * int numRecords = 0;
+ * boolean isRunning = true;
+ * Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+ * while(isRunning) {
+ *     Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *     try {
+ *         Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+ *         consumedOffsets.putAll(lastConsumedOffsets);
+ *         numRecords += records.size();
+ *         // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+ *         if(numRecords % commitInterval == 0) 
+ *           consumer.commit(false);
+ *     } catch(Exception e) {
+ *         try {
+ *             // rewind consumer's offsets for failed partitions
+ *             // assume failedPartitions() returns the list of partitions for which the processing of the last batch of messages failed
+ *             List<TopicPartition> failedPartitions = failedPartitions();   
+ *             Map<TopicPartition, Long> offsetsToRewindTo = new HashMap<TopicPartition, Long>();
+ *             for(TopicPartition failedPartition : failedPartitions) {
+ *                 // rewind to the last consumed offset for the failed partition. Since process() failed for this partition, the consumed offset
+ *                 // should still be pointing to the last successfully processed offset and hence is the right offset to rewind consumption to.
+ *                 offsetsToRewindTo.put(failedPartition, consumedOffsets.get(failedPartition));
+ *             }
+ *             // seek to new offsets only for partitions that failed the last process()
+ *             consumer.seek(offsetsToRewindTo);
+ *         } catch(Exception e) {  break; } // rewind failed
+ *     }
+ * }         
+ * consumer.close();
+ * }
+ * </pre>
+ * <p>
+ * This example demonstrates how to rewind the offsets of the entire consumer group. It is assumed that the user has chosen to use Kafka's 
+ * group management functionality for automatic consumer load balancing and failover. This example also assumes that the offsets are stored in 
+ * Kafka. If group management is used, the right place to systematically rewind offsets for <i>every</i> consumer instance is inside the 
+ * ConsumerRebalanceCallback. The onPartitionsAssigned callback is invoked after the consumer is assigned a new set of partitions on rebalance 
+ * <i>and</i> before the consumption restarts post rebalance. This is the right place to supply the newly rewound offsets to the consumer. It 
+ * is recommended that if you foresee the requirement to ever reset the consumer's offsets in the presence of group management, that you 
+ * always configure the consumer to use the ConsumerRebalanceCallback with a flag that protects whether or not the offset rewind logic is used.
+ * This method of rewinding offsets is useful if you notice an issue with your message processing after successful consumption and offset commit.
+ * And you would like to rewind the offsets for the entire consumer group as part of rolling out a fix to your processing logic. In this case,
+ * you would configure each of your consumer instances with the offset rewind configuration flag turned on and bounce each consumer instance 
+ * in a rolling restart fashion. Each restart will trigger a rebalance and eventually all consumer instances would have rewound the offsets for 
+ * the partitions they own, effectively rewinding the offsets for the entire consumer group.   
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * props.put("group.id", "test");
+ * props.put("session.timeout.ms", "1000");
+ * props.put("enable.auto.commit", "false");
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(
+ *                                            props,
+ *                                            new ConsumerRebalanceCallback() {
+ *                                                boolean rewindOffsets = true;  // should be retrieved from external application config
+ *                                                public void onPartitionsAssigned(Consumer<?, ?> consumer, Collection<TopicPartition> partitions) {
+ *                                                    Map<TopicPartition, Long> latestCommittedOffsets = consumer.committed(partitions);
+ *                                                    if(rewindOffsets)
+ *                                                        Map<TopicPartition, Long> newOffsets = rewindOffsets(latestCommittedOffsets, 100);
+ *                                                    consumer.seek(newOffsets);
+ *                                                }
+ *                                                public void onPartitionsRevoked(Consumer<?, ?> consumer, Collection<TopicPartition> partitions) {
+ *                                                    consumer.commit(true);
+ *                                                }
+ *                                                // this API rewinds every partition back by numberOfMessagesToRewindBackTo messages 
+ *                                                private Map<TopicPartition, Long> rewindOffsets(Map<TopicPartition, Long> currentOffsets,
+ *                                                                                                long numberOfMessagesToRewindBackTo) {
+ *                                                    Map<TopicPartition, Long> newOffsets = new HashMap<TopicPartition, Long>();
+ *                                                    for(Map.Entry<TopicPartition, Long> offset : currentOffsets.entrySet()) 
+ *                                                        newOffsets.put(offset.getKey(), offset.getValue() - numberOfMessagesToRewindBackTo);
+ *                                                    return newOffsets;
+ *                                                }
+ *                                            });
+ * consumer.subscribe("foo", "bar");
+ * int commitInterval = 100;
+ * int numRecords = 0;
+ * boolean isRunning = true;
+ * Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+ * while(isRunning) {
+ *     Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *     Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+ *     consumedOffsets.putAll(lastConsumedOffsets);
+ *     numRecords += records.size();
+ *     // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+ *     if(numRecords % commitInterval == 0) 
+ *         consumer.commit(consumedOffsets, true);
+ * }
+ * consumer.commit(true);
+ * consumer.close();
+ * }
+ * </pre>
+ * This example demonstrates how the consumer can be used to leverage Kafka's group management functionality along with custom offset storage. 
+ * In this example, the assumption made is that the user chooses to store the consumer offsets outside Kafka. This requires the user to 
+ * plugin logic for retrieving the offsets from a custom store and provide the offsets to the consumer in the ConsumerRebalanceCallback
+ * callback. The onPartitionsAssigned callback is invoked after the consumer is assigned a new set of partitions on rebalance <i>and</i>
+ * before the consumption restarts post rebalance. This is the right place to supply offsets from a custom store to the consumer.
+ * <p>
+ * Similarly, the user would also be required to plugin logic for storing the consumer's offsets to a custom store. The onPartitionsRevoked 
+ * callback is invoked right after the consumer has stopped fetching data and before the partition ownership changes. This is the right place 
+ * to commit the offsets for the current set of partitions owned by the consumer.  
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * props.put("group.id", "test");
+ * props.put("session.timeout.ms", "1000");
+ * props.put("enable.auto.commit", "false"); // since enable.auto.commit only applies to Kafka based offset storage
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(
+ *                                            props,
+ *                                            new ConsumerRebalanceCallback() {
+ *                                                public void onPartitionsAssigned(Consumer<?,?> consumer, Collection<TopicPartition> partitions) {
+ *                                                    Map<TopicPartition, Long> lastCommittedOffsets = getLastCommittedOffsetsFromCustomStore(partitions);
+ *                                                    consumer.seek(lastCommittedOffsets);
+ *                                                }
+ *                                                public void onPartitionsRevoked(Consumer<?,?> consumer, Collection<TopicPartition> partitions) {
+ *                                                    Map<TopicPartition, Long> offsets = getLastConsumedOffsets(partitions);
+ *                                                    commitOffsetsToCustomStore(offsets); 
+ *                                                }
+ *                                                // following APIs should be implemented by the user for custom offset management
+ *                                                private Map<TopicPartition, Long> getLastCommittedOffsetsFromCustomStore(Collection<TopicPartition> partitions) {
+ *                                                    return null;
+ *                                                }
+ *                                                private Map<TopicPartition, Long> getLastConsumedOffsets(Collection<TopicPartition> partitions) { return null; }
+ *                                                private void commitOffsetsToCustomStore(Map<TopicPartition, Long> offsets) {}
+ *                                            });
+ * consumer.subscribe("foo", "bar");
+ * int commitInterval = 100;
+ * int numRecords = 0;
+ * boolean isRunning = true;
+ * Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+ * while(isRunning) {
+ *     Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *     Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+ *     consumedOffsets.putAll(lastConsumedOffsets);
+ *     numRecords += records.size();
+ *     // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+ *     if(numRecords % commitInterval == 0) 
+ *         commitOffsetsToCustomStore(consumedOffsets);
+ * }
+ * consumer.commit(true);
+ * consumer.close();
+ * }
+ * </pre>
+ * This example demonstrates how the consumer can be used to subscribe to specific partitions of certain topics and consume upto the latest
+ * available message for each of those partitions before shutting down. When used to subscribe to specific partitions, the user foregoes 
+ * the group management functionality and instead relies on manually configuring the consumer instances to subscribe to a set of partitions.
+ * This example assumes that the user chooses to use Kafka based offset storage. The user still has to specify a group.id to use Kafka 
+ * based offset management. However, session.timeout.ms is not required since the Kafka consumer only does automatic failover when group 
+ * management is used.
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * props.put("group.id", "test");
+ * props.put("enable.auto.commit", "true");
+ * props.put("auto.commit.interval.ms", "10000");
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(props);
+ * // subscribe to some partitions of topic foo
+ * TopicPartition partition0 = new TopicPartition("foo", 0);
+ * TopicPartition partition1 = new TopicPartition("foo", 1);
+ * TopicPartition[] partitions = new TopicPartition[2];
+ * partitions[0] = partition0;
+ * partitions[1] = partition1;
+ * consumer.subscribe(partitions);
+ * // find the last committed offsets for partitions 0,1 of topic foo
+ * Map<TopicPartition, Long> lastCommittedOffsets = consumer.committed(Arrays.asList(partitions));
+ * // seek to the last committed offsets to avoid duplicates
+ * consumer.seek(lastCommittedOffsets);        
+ * // find the offsets of the latest available messages to know where to stop consumption
+ * Map<TopicPartition, Long> latestAvailableOffsets = consumer.offsetsBeforeTime(-2, Arrays.asList(partitions));
+ * boolean isRunning = true;
+ * Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+ * while(isRunning) {
+ *     Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *     Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+ *     consumedOffsets.putAll(lastConsumedOffsets);
+ *     for(TopicPartition partition : partitions) {
+ *         if(consumedOffsets.get(partition) >= latestAvailableOffsets.get(partition))
+ *             isRunning = false;
+ *         else
+ *             isRunning = true;
+ *     }
+ * }
+ * consumer.commit(true);
+ * consumer.close();
+ * }
+ * </pre>
+ * This example demonstrates how the consumer can be used to subscribe to specific partitions of certain topics and consume upto the latest
+ * available message for each of those partitions before shutting down. When used to subscribe to specific partitions, the user foregoes 
+ * the group management functionality and instead relies on manually configuring the consumer instances to subscribe to a set of partitions.
+ * This example assumes that the user chooses to use custom offset storage.
+ * <pre>
+ * {@code  
+ * Properties props = new Properties();
+ * props.put("metadata.broker.list", "localhost:9092");
+ * KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<byte[], byte[]>(props);
+ * // subscribe to some partitions of topic foo
+ * TopicPartition partition0 = new TopicPartition("foo", 0);
+ * TopicPartition partition1 = new TopicPartition("foo", 1);
+ * TopicPartition[] partitions = new TopicPartition[2];
+ * partitions[0] = partition0;
+ * partitions[1] = partition1;
+ * consumer.subscribe(partitions);
+ * Map<TopicPartition, Long> lastCommittedOffsets = getLastCommittedOffsetsFromCustomStore();
+ * // seek to the last committed offsets to avoid duplicates
+ * consumer.seek(lastCommittedOffsets);        
+ * // find the offsets of the latest available messages to know where to stop consumption
+ * Map<TopicPartition, Long> latestAvailableOffsets = consumer.offsetsBeforeTime(-2, Arrays.asList(partitions));
+ * boolean isRunning = true;
+ * Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+ * while(isRunning) {
+ *     Map<String, ConsumerRecords<byte[], byte[]>> records = consumer.poll(100);
+ *     Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+ *     consumedOffsets.putAll(lastConsumedOffsets);
+ *     // commit offsets for partitions 0,1 for topic foo to custom store
+ *     commitOffsetsToCustomStore(consumedOffsets);
+ *     for(TopicPartition partition : partitions) {
+ *         if(consumedOffsets.get(partition) >= latestAvailableOffsets.get(partition))
+ *             isRunning = false;
+ *         else
+ *             isRunning = true;
+ *     }            
+ * }      
+ * commitOffsetsToCustomStore(consumedOffsets);   
+ * consumer.close();
+ * }
+ * </pre>
+ */
+public class KafkaConsumer<K,V> implements Consumer<K,V> {
+
+    private static final Logger log = LoggerFactory.getLogger(KafkaConsumer.class);
+
+    private final long metadataFetchTimeoutMs;
+    private final long totalMemorySize;
+    private final Metrics metrics;
+    private final Set<String> subscribedTopics;
+    private final Set<TopicPartition> subscribedPartitions;
+    private final Deserializer<K> keyDeserializer;
+    private final Deserializer<V> valueDeserializer;
+
+    /**
+     * A consumer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings
+     * are documented <a href="http://kafka.apache.org/documentation.html#consumerconfigs">here</a>. Values can be
+     * either strings or Objects of the appropriate type (for example a numeric configuration would accept either the
+     * string "42" or the integer 42).
+     * <p>
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     * @param configs   The consumer configs
+     */
+    public KafkaConsumer(Map<String, Object> configs) {
+        this(new ConsumerConfig(configs), null, null, null);
+    }
+
+    /**
+     * A consumer is instantiated by providing a set of key-value pairs as configuration and a {@link ConsumerRebalanceCallback} 
+     * implementation 
+     * <p>
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     * @param configs   The consumer configs
+     * @param callback  A callback interface that the user can implement to manage customized offsets on the start and end of 
+     *                  every rebalance operation.  
+     */
+    public KafkaConsumer(Map<String, Object> configs, ConsumerRebalanceCallback callback) {
+        this(new ConsumerConfig(configs), callback, null, null);
+    }
+
+    /**
+     * A consumer is instantiated by providing a set of key-value pairs as configuration, a {@link ConsumerRebalanceCallback}
+     * implementation, a key and a value {@link Deserializer}.
+     * <p>
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     * @param configs   The consumer configs
+     * @param callback  A callback interface that the user can implement to manage customized offsets on the start and end of
+     *                  every rebalance operation.
+     * @param keyDeserializer  The deserializer for key that implements {@link Deserializer}. The configure() method won't
+     *                         be called in the consumer when the deserializer is passed in directly.
+     * @param valueDeserializer  The deserializer for value that implements {@link Deserializer}. The configure() method
+     *                           won't be called in the consumer when the deserializer is passed in directly.
+     */
+    public KafkaConsumer(Map<String, Object> configs, ConsumerRebalanceCallback callback, Deserializer<K> keyDeserializer, Deserializer<V> valueDeserializer) {
+        this(new ConsumerConfig(configs), callback, keyDeserializer, valueDeserializer);
+    }
+
+    /**
+     * A consumer is instantiated by providing a {@link java.util.Properties} object as configuration.      
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     */
+    public KafkaConsumer(Properties properties) {
+        this(new ConsumerConfig(properties), null, null, null);
+    }
+
+    /**
+     * A consumer is instantiated by providing a {@link java.util.Properties} object as configuration and a 
+     * {@link ConsumerRebalanceCallback} implementation.
+     * <p>
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     * @param properties The consumer configuration properties
+     * @param callback   A callback interface that the user can implement to manage customized offsets on the start and end of 
+     *                   every rebalance operation.  
+     */
+    public KafkaConsumer(Properties properties, ConsumerRebalanceCallback callback) {
+        this(new ConsumerConfig(properties), callback, null, null);
+    }
+
+    /**
+     * A consumer is instantiated by providing a {@link java.util.Properties} object as configuration and a
+     * {@link ConsumerRebalanceCallback} implementation, a key and a value {@link Deserializer}.
+     * <p>
+     * Valid configuration strings are documented at {@link ConsumerConfig}
+     * @param properties The consumer configuration properties
+     * @param callback   A callback interface that the user can implement to manage customized offsets on the start and end of
+     *                   every rebalance operation.
+     * @param keyDeserializer  The deserializer for key that implements {@link Deserializer}. The configure() method won't
+     *                         be called in the consumer when the deserializer is passed in directly.
+     * @param valueDeserializer  The deserializer for value that implements {@link Deserializer}. The configure() method
+     *                           won't be called in the consumer when the deserializer is passed in directly.
+     */
+    public KafkaConsumer(Properties properties, ConsumerRebalanceCallback callback, Deserializer<K> keyDeserializer, Deserializer<V> valueDeserializer) {
+        this(new ConsumerConfig(properties), callback, keyDeserializer, valueDeserializer);
+    }
+
+    private KafkaConsumer(ConsumerConfig config, ConsumerRebalanceCallback callback, Deserializer<K> keyDeserializer, Deserializer<V> valueDeserializer) {
+        log.trace("Starting the Kafka consumer");
+        subscribedTopics = new HashSet<String>();
+        subscribedPartitions = new HashSet<TopicPartition>();
+        this.metrics = new Metrics(new MetricConfig(),
+                                   Collections.singletonList((MetricsReporter) new JmxReporter("kafka.consumer.")),
+                                   new SystemTime());
+        this.metadataFetchTimeoutMs = config.getLong(ConsumerConfig.METADATA_FETCH_TIMEOUT_CONFIG);
+        this.totalMemorySize = config.getLong(ConsumerConfig.TOTAL_BUFFER_MEMORY_CONFIG);
+        List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(config.getList(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
+
+        if (keyDeserializer == null)
+            this.keyDeserializer = config.getConfiguredInstance(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
+                                                                Deserializer.class);
+        else
+            this.keyDeserializer = keyDeserializer;
+        if (valueDeserializer == null)
+            this.valueDeserializer = config.getConfiguredInstance(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
+                                                                  Deserializer.class);
+        else
+            this.valueDeserializer = valueDeserializer;
+
+        config.logUnused();
+        log.debug("Kafka consumer started");
+    }
+
+    /**
+     * Incrementally subscribes to the given list of topics and uses the consumer's group management functionality
+     * <p>
+     * As part of group management, the consumer will keep track of the list of consumers that belong to a particular group and
+     * will trigger a rebalance operation if one of the following events trigger -
+     * <ul>
+     * <li> Number of partitions change for any of the subscribed list of topics 
+     * <li> Topic is created or deleted 
+     * <li> An existing member of the consumer group dies 
+     * <li> A new member is added to an existing consumer group via the join API 
+     * </ul> 
+     * @param topics A variable list of topics that the consumer wants to subscribe to
+     */
+    @Override
+    public void subscribe(String... topics) {
+        if(subscribedPartitions.size() > 0)
+            throw new IllegalStateException("Subcription to topics and partitions is mutually exclusive");
+        for(String topic:topics)
+            subscribedTopics.add(topic);
+        // TODO: trigger a rebalance operation
+    }
+
+    /**
+     * Incrementally subscribes to a specific topic partition and does not use the consumer's group management functionality. As such,
+     * there will be no rebalance operation triggered when group membership or cluster and topic metadata change.
+     * <p>
+     * @param partitions Partitions to incrementally subscribe to
+     */
+    @Override
+    public void subscribe(TopicPartition... partitions) {
+        if(subscribedTopics.size() > 0)
+            throw new IllegalStateException("Subcription to topics and partitions is mutually exclusive");
+        for(TopicPartition partition:partitions)
+            subscribedPartitions.add(partition);
+    }
+
+    /**
+     * Unsubscribe from the specific topics. This will trigger a rebalance operation and messages for this topic will not be returned 
+     * from the next {@link #poll(long) poll()} onwards
+     * @param topics Topics to unsubscribe from
+     */
+    public void unsubscribe(String... topics) {   
+        // throw an exception if the topic was never subscribed to
+        for(String topic:topics) {
+            if(!subscribedTopics.contains(topic))
+                throw new IllegalStateException("Topic " + topic + " was never subscribed to. subscribe(" + topic + ") should be called prior" +
+                		" to unsubscribe(" + topic + ")");
+            subscribedTopics.remove(topic);
+        }
+        // TODO trigger a rebalance operation
+    }
+
+    /**
+     * Unsubscribe from the specific topic partitions. Messages for these partitions will not be returned from the next 
+     * {@link #poll(long) poll()} onwards
+     * @param partitions Partitions to unsubscribe from
+     */
+    public void unsubscribe(TopicPartition... partitions) {        
+        // throw an exception if the partition was never subscribed to
+        for(TopicPartition partition:partitions) {
+            if(!subscribedPartitions.contains(partition))
+                throw new IllegalStateException("Partition " + partition + " was never subscribed to. subscribe(new TopicPartition(" + 
+                                                 partition.topic() + "," + partition.partition() + ") should be called prior" +
+                                                " to unsubscribe(new TopicPartition(" + partition.topic() + "," + partition.partition() + ")");
+            subscribedPartitions.remove(partition);                
+        }
+        // trigger a rebalance operation
+    }
+    
+    /**
+     * Fetches data for the topics or partitions specified using one of the subscribe APIs. It is an error to not have subscribed to
+     * any topics or partitions before polling for data.
+     * <p> 
+     * The offset used for fetching the data is governed by whether or not {@link #seek(Map) seek(offsets)}
+     * is used. If {@link #seek(Map) seek(offsets)} is used, it will use the specified offsets on startup and
+     * on every rebalance, to consume data from that offset sequentially on every poll. If not, it will use the last checkpointed offset 
+     * using {@link #commit(Map, boolean) commit(offsets, sync)} 
+     * for the subscribed list of partitions.
+     * @param timeout  The time, in milliseconds, spent waiting in poll if data is not available. If 0, waits indefinitely. Must not be negative
+     * @return map of topic to records since the last fetch for the subscribed list of topics and partitions
+     */
+    @Override
+    public Map<String, ConsumerRecords<K,V>> poll(long timeout) {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+    /**
+     * Commits the specified offsets for the specified list of topics and partitions to Kafka.
+     * <p>
+     * This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance
+     * and also on startup. As such, if you need to store offsets in anything other than Kafka, this API should not be used.
+     * @param offsets The list of offsets per partition that should be committed to Kafka. 
+     * @param sync If true, commit will block until the consumer receives an acknowledgment  
+     * @return An {@link OffsetMetadata} object that contains the partition, offset and a corresponding error code. Returns null
+     * if the sync flag is set to false.  
+     */
+    @Override
+    public OffsetMetadata commit(Map<TopicPartition, Long> offsets, boolean sync) {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Commits offsets returned on the last {@link #poll(long) poll()} for the subscribed list of topics and 
+     * partitions. 
+     * <p>
+     * This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance
+     * and also on startup. As such, if you need to store offsets in anything other than Kafka, this API should not be used.
+     * @param sync If true, commit will block until the consumer receives an acknowledgment  
+     * @return An {@link OffsetMetadata} object that contains the partition, offset and a corresponding error code. Returns null
+     * if the sync flag is set to false.
+     */
+    @Override
+    public OffsetMetadata commit(boolean sync) {
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Overrides the fetch offsets that the consumer will use on the next {@link #poll(long) poll(timeout)}. If this API is invoked
+     * for the same partition more than once, the latest offset will be used on the next poll(). Note that you may lose data if this API is 
+     * arbitrarily used in the middle of consumption, to reset the fetch offsets  
+     */
+    @Override
+    public void seek(Map<TopicPartition, Long> offsets) {
+    }
+
+    /**
+     * Returns the fetch position of the <i>next message</i> for the specified topic partition to be used on the next {@link #poll(long) poll()}
+     * @param partitions Partitions for which the fetch position will be returned
+     * @return The position from which data will be fetched for the specified partition on the next {@link #poll(long) poll()}
+     */
+    public Map<TopicPartition, Long> position(Collection<TopicPartition> partitions) {
+        return null;
+    }
+
+    /**
+     * Fetches the last committed offsets of partitions that the consumer currently consumes. This API is only relevant if Kafka based offset
+     * storage is used. This API can be used in conjunction with {@link #seek(Map) seek(offsets)} to rewind consumption of data.  
+     * @param partitions The list of partitions to return the last committed offset for
+     * @return The list of offsets committed on the last {@link #commit(boolean) commit(sync)} 
+     */
+    @Override
+    public Map<TopicPartition, Long> committed(Collection<TopicPartition> partitions) {
+        // TODO Auto-generated method stub
+        throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Fetches offsets before a certain timestamp. Note that the offsets returned are approximately computed and do not correspond to the exact
+     * message at the given timestamp. As such, if the consumer is rewound to offsets returned by this API, there may be duplicate messages 
+     * returned by the consumer. 
+     * @param partitions The list of partitions for which the offsets are returned
+     * @param timestamp The unix timestamp. Value -1 indicates earliest available timestamp. Value -2 indicates latest available timestamp. 
+     * @return The offsets per partition before the specified timestamp.
+     */
+    public Map<TopicPartition, Long> offsetsBeforeTime(long timestamp, Collection<TopicPartition> partitions) {
+        return null;
+    }
+
+    @Override
+    public Map<String, ? extends Metric> metrics() {
+        return Collections.unmodifiableMap(this.metrics.metrics());
+    }
+
+    @Override
+    public void close() {
+        log.trace("Closing the Kafka consumer.");
+        subscribedTopics.clear();
+        subscribedPartitions.clear();
+        this.metrics.close();
+        log.debug("The Kafka consumer has closed.");
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java
new file mode 100644
index 0000000000000..8cab16c0a0bdb
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.TopicPartition;
+
+/**
+ * A mock of the {@link Consumer} interface you can use for testing code that uses Kafka.
+ * This class is <i> not threadsafe </i>
+ * <p>
+ * The consumer runs in the user thread and multiplexes I/O over TCP connections to each of the brokers it
+ * needs to communicate with. Failure to close the consumer after use will leak these resources.
+ */
+public class MockConsumer implements Consumer<byte[], byte[]> {
+
+    private final Set<TopicPartition> subscribedPartitions;
+    private final Set<String> subscribedTopics;
+    private final Map<TopicPartition, Long> committedOffsets; 
+    private final Map<TopicPartition, Long> consumedOffsets;
+    
+    public MockConsumer() {
+        subscribedPartitions = new HashSet<TopicPartition>();
+        subscribedTopics = new HashSet<String>();
+        committedOffsets = new HashMap<TopicPartition, Long>();
+        consumedOffsets = new HashMap<TopicPartition, Long>();
+    }
+    
+    @Override
+    public void subscribe(String... topics) {
+        if(subscribedPartitions.size() > 0)
+            throw new IllegalStateException("Subcription to topics and partitions is mutually exclusive");
+        for(String topic : topics) {
+            subscribedTopics.add(topic);
+        }
+    }
+
+    @Override
+    public void subscribe(TopicPartition... partitions) {
+        if(subscribedTopics.size() > 0)
+            throw new IllegalStateException("Subcription to topics and partitions is mutually exclusive");
+        for(TopicPartition partition : partitions) {
+            subscribedPartitions.add(partition);
+            consumedOffsets.put(partition, 0L);
+        }
+    }
+
+    public void unsubscribe(String... topics) {
+        // throw an exception if the topic was never subscribed to
+        for(String topic:topics) {
+            if(!subscribedTopics.contains(topic))
+                throw new IllegalStateException("Topic " + topic + " was never subscribed to. subscribe(" + topic + ") should be called prior" +
+                        " to unsubscribe(" + topic + ")");
+            subscribedTopics.remove(topic);
+        }
+    }
+
+    public void unsubscribe(TopicPartition... partitions) {
+        // throw an exception if the partition was never subscribed to
+        for(TopicPartition partition:partitions) {
+            if(!subscribedPartitions.contains(partition))
+                throw new IllegalStateException("Partition " + partition + " was never subscribed to. subscribe(new TopicPartition(" + 
+                                                 partition.topic() + "," + partition.partition() + ") should be called prior" +
+                                                " to unsubscribe(new TopicPartition(" + partition.topic() + "," + partition.partition() + ")");
+            subscribedPartitions.remove(partition);                    
+            committedOffsets.remove(partition);
+            consumedOffsets.remove(partition);
+        }
+    }
+
+    @Override
+    public Map<String, ConsumerRecords<byte[], byte[]>> poll(long timeout) {
+        // hand out one dummy record, 1 per topic
+        Map<String, List<ConsumerRecord>> records = new HashMap<String, List<ConsumerRecord>>();
+        Map<String, ConsumerRecords<byte[], byte[]>> recordMetadata = new HashMap<String, ConsumerRecords<byte[], byte[]>>();
+        for(TopicPartition partition : subscribedPartitions) {
+            // get the last consumed offset
+            long messageSequence = consumedOffsets.get(partition);
+            ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
+            ObjectOutputStream outputStream;
+            try {
+                outputStream = new ObjectOutputStream(byteStream);
+                outputStream.writeLong(messageSequence++);
+                outputStream.close();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+            List<ConsumerRecord> recordsForTopic = records.get(partition.topic());
+            if(recordsForTopic == null) {
+                recordsForTopic = new ArrayList<ConsumerRecord>();
+                records.put(partition.topic(), recordsForTopic);
+            }
+            recordsForTopic.add(new ConsumerRecord(partition.topic(), partition.partition(), null, byteStream.toByteArray(), messageSequence));
+            consumedOffsets.put(partition, messageSequence);
+        }
+        for(Entry<String, List<ConsumerRecord>> recordsPerTopic : records.entrySet()) {
+            Map<Integer, List<ConsumerRecord>> recordsPerPartition = new HashMap<Integer, List<ConsumerRecord>>();
+            for(ConsumerRecord record : recordsPerTopic.getValue()) {
+                List<ConsumerRecord> recordsForThisPartition = recordsPerPartition.get(record.partition());
+                if(recordsForThisPartition == null) {
+                    recordsForThisPartition = new ArrayList<ConsumerRecord>();
+                    recordsPerPartition.put(record.partition(), recordsForThisPartition);
+                }
+                recordsForThisPartition.add(record);
+            }
+            recordMetadata.put(recordsPerTopic.getKey(), new ConsumerRecords(recordsPerTopic.getKey(), recordsPerPartition));
+        }
+        return recordMetadata;
+    }
+
+    @Override
+    public OffsetMetadata commit(Map<TopicPartition, Long> offsets, boolean sync) {
+        if(!sync)
+            return null;
+        for(Entry<TopicPartition, Long> partitionOffset : offsets.entrySet()) {
+            committedOffsets.put(partitionOffset.getKey(), partitionOffset.getValue());            
+        }        
+        return new OffsetMetadata(committedOffsets, null);
+    }
+
+    @Override
+    public OffsetMetadata commit(boolean sync) {
+        if(!sync)
+            return null;
+        return commit(consumedOffsets, sync);
+    }
+
+    @Override
+    public void seek(Map<TopicPartition, Long> offsets) {
+        // change the fetch offsets
+        for(Entry<TopicPartition, Long> partitionOffset : offsets.entrySet()) {
+            consumedOffsets.put(partitionOffset.getKey(), partitionOffset.getValue());            
+        }
+    }
+
+    @Override
+    public Map<TopicPartition, Long> committed(Collection<TopicPartition> partitions) {
+        Map<TopicPartition, Long> offsets = new HashMap<TopicPartition, Long>();
+        for(TopicPartition partition : partitions) {
+            offsets.put(new TopicPartition(partition.topic(), partition.partition()), committedOffsets.get(partition));
+        }
+        return offsets;
+    }
+
+    @Override
+    public Map<TopicPartition, Long> position(Collection<TopicPartition> partitions) {
+        Map<TopicPartition, Long> positions = new HashMap<TopicPartition, Long>();
+        for(TopicPartition partition : partitions) {
+            positions.put(partition, consumedOffsets.get(partition));
+        }
+        return positions;
+    }
+
+    @Override
+    public Map<TopicPartition, Long> offsetsBeforeTime(long timestamp,
+            Collection<TopicPartition> partitions) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Map<String, ? extends Metric> metrics() {        
+        return null;
+    }
+
+    @Override
+    public void close() {
+       // unsubscribe from all partitions
+        TopicPartition[] allPartitions = new TopicPartition[subscribedPartitions.size()];
+        unsubscribe(subscribedPartitions.toArray(allPartitions));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetMetadata.java b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetMetadata.java
new file mode 100644
index 0000000000000..ea423ad15eebd
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetMetadata.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.consumer;
+
+import java.util.Map;
+
+import org.apache.kafka.common.TopicPartition;
+
+/**
+ * The metadata for an offset commit that has been acknowledged by the server
+ */
+public final class OffsetMetadata {
+
+    private final Map<TopicPartition, Long> offsets;
+    private final Map<TopicPartition, RuntimeException> errors;
+    
+    public OffsetMetadata(Map<TopicPartition, Long> offsets, Map<TopicPartition, RuntimeException> errors) {
+        super();
+        this.offsets = offsets;
+        this.errors = errors;
+    }
+
+    public OffsetMetadata(Map<TopicPartition, Long> offsets) {
+        this(offsets, null);
+    }
+
+    /**
+     * The offset of the record in the topic/partition.
+     */
+    public long offset(TopicPartition partition) {
+        if(this.errors != null)
+            throw errors.get(partition);
+        return offsets.get(partition);
+    }
+
+    /**
+     * @return The exception corresponding to the error code returned by the server
+     */
+    public Exception error(TopicPartition partition) {
+        if(errors != null)
+            return errors.get(partition);
+        else
+            return null;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ByteArraySerializer.java b/clients/src/main/java/org/apache/kafka/clients/producer/ByteArraySerializer.java
new file mode 100644
index 0000000000000..9005b74a328c9
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ByteArraySerializer.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.clients.producer;
+
+import java.util.Map;
+
+public class ByteArraySerializer implements Serializer<byte[]> {
+
+    @Override
+    public void configure(Map<String, ?> configs) {
+        // nothing to do
+    }
+
+    @Override
+    public byte[] serialize(String topic, byte[] data, boolean isKey) {
+        return data;
+    }
+
+    @Override
+    public void close() {
+        // nothing to do
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index 4b2f55650945a..f61efb35db7e0 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -1,23 +1,18 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
 import java.net.InetSocketAddress;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -26,7 +21,7 @@
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.kafka.clients.producer.internals.FutureRecordMetadata;
+import org.apache.kafka.clients.NetworkClient;
 import org.apache.kafka.clients.producer.internals.Metadata;
 import org.apache.kafka.clients.producer.internals.Partitioner;
 import org.apache.kafka.clients.producer.internals.RecordAccumulator;
@@ -37,18 +32,24 @@
 import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.errors.ApiException;
 import org.apache.kafka.common.errors.RecordTooLargeException;
+import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.metrics.JmxReporter;
 import org.apache.kafka.common.metrics.MetricConfig;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.metrics.MetricsReporter;
+import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.network.Selector;
 import org.apache.kafka.common.record.CompressionType;
 import org.apache.kafka.common.record.Record;
 import org.apache.kafka.common.record.Records;
+import org.apache.kafka.common.utils.ClientUtils;
 import org.apache.kafka.common.utils.KafkaThread;
 import org.apache.kafka.common.utils.SystemTime;
-
+import org.apache.kafka.common.utils.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A Kafka client that publishes records to the Kafka cluster.
@@ -58,7 +59,9 @@
  * The producer manages a single background thread that does I/O as well as a TCP connection to each of the brokers it
  * needs to communicate with. Failure to close the producer after use will leak these resources.
  */
-public class KafkaProducer implements Producer {
+public class KafkaProducer<K,V> implements Producer<K,V> {
+
+    private static final Logger log = LoggerFactory.getLogger(KafkaProducer.class);
 
     private final Partitioner partitioner;
     private final int maxRequestSize;
@@ -69,82 +72,141 @@ public class KafkaProducer implements Producer {
     private final Sender sender;
     private final Metrics metrics;
     private final Thread ioThread;
+    private final CompressionType compressionType;
+    private final Sensor errors;
+    private final Time time;
+    private final Serializer<K> keySerializer;
+    private final Serializer<V> valueSerializer;
 
     /**
      * A producer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings
      * are documented <a href="http://kafka.apache.org/documentation.html#producerconfigs">here</a>. Values can be
      * either strings or Objects of the appropriate type (for example a numeric configuration would accept either the
      * string "42" or the integer 42).
+     * @param configs   The producer configs
+     *
      */
     public KafkaProducer(Map<String, Object> configs) {
-        this(new ProducerConfig(configs));
+        this(new ProducerConfig(configs), null, null);
+    }
+
+    /**
+     * A producer is instantiated by providing a set of key-value pairs as configuration, a key and a value {@link Serializer}.
+     * Valid configuration strings are documented <a href="http://kafka.apache.org/documentation.html#producerconfigs">here</a>.
+     * Values can be either strings or Objects of the appropriate type (for example a numeric configuration would accept
+     * either the string "42" or the integer 42).
+     * @param configs   The producer configs
+     * @param keySerializer  The serializer for key that implements {@link Serializer}. The configure() method won't be
+     *                       called in the producer when the serializer is passed in directly.
+     * @param valueSerializer  The serializer for value that implements {@link Serializer}. The configure() method won't
+     *                         be called in the producer when the serializer is passed in directly.
+     */
+    public KafkaProducer(Map<String, Object> configs, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
+        this(new ProducerConfig(configs), keySerializer, valueSerializer);
     }
 
     /**
      * A producer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings
      * are documented <a href="http://kafka.apache.org/documentation.html#producerconfigs">here</a>.
+     * @param properties   The producer configs
      */
     public KafkaProducer(Properties properties) {
-        this(new ProducerConfig(properties));
+        this(new ProducerConfig(properties), null, null);
     }
 
-    private KafkaProducer(ProducerConfig config) {
-        this.metrics = new Metrics(new MetricConfig(),
-                                   Collections.singletonList((MetricsReporter) new JmxReporter("kafka.producer.")),
-                                   new SystemTime());
+    /**
+     * A producer is instantiated by providing a set of key-value pairs as configuration, a key and a value {@link Serializer}.
+     * Valid configuration strings are documented <a href="http://kafka.apache.org/documentation.html#producerconfigs">here</a>.
+     * @param properties   The producer configs
+     * @param keySerializer  The serializer for key that implements {@link Serializer}. The configure() method won't be
+     *                       called in the producer when the serializer is passed in directly.
+     * @param valueSerializer  The serializer for value that implements {@link Serializer}. The configure() method won't
+     *                         be called in the producer when the serializer is passed in directly.
+     */
+    public KafkaProducer(Properties properties, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
+        this(new ProducerConfig(properties), keySerializer, valueSerializer);
+    }
+
+    private KafkaProducer(ProducerConfig config, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
+        log.trace("Starting the Kafka producer");
+        this.time = new SystemTime();
+        MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
+                                                      .timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG),
+                                                                  TimeUnit.MILLISECONDS);
+        String clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
+        String jmxPrefix = "kafka.producer." + (clientId.length() > 0 ? clientId + "." : "");
+        List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG,
+                                                                        MetricsReporter.class);
+        reporters.add(new JmxReporter(jmxPrefix));
+        this.metrics = new Metrics(metricConfig, reporters, time);
         this.partitioner = new Partitioner();
+        long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
         this.metadataFetchTimeoutMs = config.getLong(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG);
-        this.metadata = new Metadata();
+        this.metadata = new Metadata(retryBackoffMs, config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG));
         this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
-        this.totalMemorySize = config.getLong(ProducerConfig.TOTAL_BUFFER_MEMORY_CONFIG);
-        this.accumulator = new RecordAccumulator(config.getInt(ProducerConfig.MAX_PARTITION_SIZE_CONFIG),
+        this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
+        this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
+        this.accumulator = new RecordAccumulator(config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
                                                  this.totalMemorySize,
                                                  config.getLong(ProducerConfig.LINGER_MS_CONFIG),
-                                                 config.getBoolean(ProducerConfig.BLOCK_ON_BUFFER_FULL),
+                                                 retryBackoffMs,
+                                                 config.getBoolean(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG),
                                                  metrics,
-                                                 new SystemTime());
-        List<InetSocketAddress> addresses = parseAndValidateAddresses(config.getList(ProducerConfig.BROKER_LIST_CONFIG));
-        this.metadata.update(Cluster.bootstrap(addresses), System.currentTimeMillis());
-        this.sender = new Sender(new Selector(),
+                                                 time);
+        List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG));
+        this.metadata.update(Cluster.bootstrap(addresses), time.milliseconds());
+
+        NetworkClient client = new NetworkClient(new Selector(this.metrics, time),
+                                                 this.metadata,
+                                                 clientId,
+                                                 config.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION),
+                                                 config.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG),
+                                                 config.getInt(ProducerConfig.SEND_BUFFER_CONFIG),
+                                                 config.getInt(ProducerConfig.RECEIVE_BUFFER_CONFIG));
+        this.sender = new Sender(client,
                                  this.metadata,
                                  this.accumulator,
-                                 config.getString(ProducerConfig.CLIENT_ID_CONFIG),
                                  config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG),
-                                 config.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG),
-                                 (short) config.getInt(ProducerConfig.REQUIRED_ACKS_CONFIG),
-                                 config.getInt(ProducerConfig.REQUEST_TIMEOUT_CONFIG),
+                                 (short) parseAcks(config.getString(ProducerConfig.ACKS_CONFIG)),
+                                 config.getInt(ProducerConfig.RETRIES_CONFIG),
+                                 config.getInt(ProducerConfig.TIMEOUT_CONFIG),
+                                 this.metrics,
                                  new SystemTime());
-        this.ioThread = new KafkaThread("kafka-network-thread", this.sender, true);
+        String ioThreadName = "kafka-producer-network-thread" + (clientId.length() > 0 ? " | " + clientId : "");
+        this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
         this.ioThread.start();
+
+        this.errors = this.metrics.sensor("errors");
+
+        if (keySerializer == null)
+            this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
+                                                              Serializer.class);
+        else
+            this.keySerializer = keySerializer;
+        if (valueSerializer == null)
+            this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
+                                                                Serializer.class);
+        else
+            this.valueSerializer = valueSerializer;
+
+        config.logUnused();
+        log.debug("Kafka producer started");
     }
 
-    private static List<InetSocketAddress> parseAndValidateAddresses(List<String> urls) {
-        List<InetSocketAddress> addresses = new ArrayList<InetSocketAddress>();
-        for (String url : urls) {
-            if (url != null && url.length() > 0) {
-                String[] pieces = url.split(":");
-                if (pieces.length != 2)
-                    throw new ConfigException("Invalid url in metadata.broker.list: " + url);
-                try {
-                    InetSocketAddress address = new InetSocketAddress(pieces[0], Integer.parseInt(pieces[1]));
-                    if (address.isUnresolved())
-                        throw new ConfigException("DNS resolution failed for metadata bootstrap url: " + url);
-                    addresses.add(address);
-                } catch (NumberFormatException e) {
-                    throw new ConfigException("Invalid port in metadata.broker.list: " + url);
-                }
-            }
+    private static int parseAcks(String acksString) {
+        try {
+            return acksString.trim().toLowerCase().equals("all") ? -1 : Integer.parseInt(acksString.trim());
+        } catch (NumberFormatException e) {
+            throw new ConfigException("Invalid configuration value for 'acks': " + acksString);
         }
-        if (addresses.size() < 1)
-            throw new ConfigException("No bootstrap urls given in metadata.broker.list.");
-        return addresses;
     }
 
     /**
      * Asynchronously send a record to a topic. Equivalent to {@link #send(ProducerRecord, Callback) send(record, null)}
+     * @param record  The record to be sent
      */
     @Override
-    public Future<RecordMetadata> send(ProducerRecord record) {
+    public Future<RecordMetadata> send(ProducerRecord<K,V> record) {
         return send(record, null);
     }
 
@@ -166,14 +228,14 @@ public Future<RecordMetadata> send(ProducerRecord record) {
      * If you want to simulate a simple blocking call you can do the following:
      * 
      * <pre>
-     *   producer.send(new ProducerRecord("the-topic", "key, "value")).get();
+     *   producer.send(new ProducerRecord<byte[],byte[]>("the-topic", "key".getBytes(), "value".getBytes())).get();
      * </pre>
      * <p>
      * Those desiring fully non-blocking usage can make use of the {@link Callback} parameter to provide a callback that
      * will be invoked when the request is complete.
      * 
      * <pre>
-     *   ProducerRecord record = new ProducerRecord("the-topic", "key, "value");
+     *   ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>("the-topic", "key".getBytes(), "value".getBytes());
      *   producer.send(myRecord,
      *                 new Callback() {
      *                     public void onCompletion(RecordMetadata metadata, Exception e) {
@@ -188,8 +250,8 @@ public Future<RecordMetadata> send(ProducerRecord record) {
      * following example <code>callback1</code> is guaranteed to execute before <code>callback2</code>:
      * 
      * <pre>
-     * producer.send(new ProducerRecord(topic, partition, key, value), callback1);
-     * producer.send(new ProducerRecord(topic, partition, key2, value2), callback2);
+     * producer.send(new ProducerRecord<byte[],byte[]>(topic, partition, key, value), callback1);
+     * producer.send(new ProducerRecord<byte[],byte[]>(topic, partition, key2, value2), callback2);
      * </pre>
      * <p>
      * Note that callbacks will generally execute in the I/O thread of the producer and so should be reasonably fast or
@@ -209,41 +271,87 @@ public Future<RecordMetadata> send(ProducerRecord record) {
      *        indicates no callback)
      */
     @Override
-    public Future<RecordMetadata> send(ProducerRecord record, Callback callback) {
+    public Future<RecordMetadata> send(ProducerRecord<K,V> record, Callback callback) {
         try {
-            Cluster cluster = metadata.fetch(record.topic(), this.metadataFetchTimeoutMs);
-            int partition = partitioner.partition(record, cluster);
-            ensureValidSize(record.key(), record.value());
+            // first make sure the metadata for the topic is available
+            waitOnMetadata(record.topic(), this.metadataFetchTimeoutMs);
+            byte[] serializedKey = keySerializer.serialize(record.topic(), record.key(), true);
+            byte[] serializedValue = valueSerializer.serialize(record.topic(), record.value(), false);
+            ProducerRecord serializedRecord = new ProducerRecord<byte[], byte[]>(record.topic(), record.partition(), serializedKey, serializedValue);
+            int partition = partitioner.partition(serializedRecord, metadata.fetch());
+            int serializedSize = Records.LOG_OVERHEAD + Record.recordSize(serializedKey, serializedValue);
+            ensureValidRecordSize(serializedSize);
             TopicPartition tp = new TopicPartition(record.topic(), partition);
-            FutureRecordMetadata future = accumulator.append(tp, record.key(), record.value(), CompressionType.NONE, callback);
-            this.sender.wakeup();
-            return future;
-        } catch (Exception e) {
+            log.trace("Sending record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition);
+            RecordAccumulator.RecordAppendResult result = accumulator.append(tp, serializedKey, serializedValue, compressionType, callback);
+            if (result.batchIsFull || result.newBatchCreated) {
+                log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);
+                this.sender.wakeup();
+            }
+            return result.future;
+            // Handling exceptions and record the errors;
+            // For API exceptions return them in the future,
+            // for other exceptions throw directly
+        } catch (ApiException e) {
+            log.debug("Exception occurred during message send:", e);
             if (callback != null)
                 callback.onCompletion(null, e);
+            this.errors.record();
             return new FutureFailure(e);
+        } catch (InterruptedException e) {
+            this.errors.record();
+            throw new KafkaException(e);
+        } catch (KafkaException e) {
+            this.errors.record();
+            throw e;
         }
     }
 
     /**
-     * Check that this key-value pair will have a serialized size small enough
+     * Wait for cluster metadata including partitions for the given topic to be available.
+     * @param topic The topic we want metadata for
+     * @param maxWaitMs The maximum time in ms for waiting on the metadata
      */
-    private void ensureValidSize(byte[] key, byte[] value) {
-        int serializedSize = Records.LOG_OVERHEAD + Record.recordSize(key, value);
-        if (serializedSize > this.maxRequestSize)
-            throw new RecordTooLargeException("The message is " + serializedSize
-                                              + " bytes when serialized which is larger than the maximum request size you have configured with the "
-                                              + ProducerConfig.MAX_REQUEST_SIZE_CONFIG
-                                              + " configuration.");
-        if (serializedSize > this.totalMemorySize)
-            throw new RecordTooLargeException("The message is " + serializedSize
-                                              + " bytes when serialized which is larger than the total memory buffer you have configured with the "
-                                              + ProducerConfig.TOTAL_BUFFER_MEMORY_CONFIG
-                                              + " configuration.");
+    private void waitOnMetadata(String topic, long maxWaitMs) {
+        if (metadata.fetch().partitionsForTopic(topic) != null) {
+            return;
+        } else {
+            long begin = time.milliseconds();
+            long remainingWaitMs = maxWaitMs;
+            while (metadata.fetch().partitionsForTopic(topic) == null) {
+                log.trace("Requesting metadata update for topic {}.", topic);
+                int version = metadata.requestUpdate();
+                metadata.add(topic);
+                sender.wakeup();
+                metadata.awaitUpdate(version, remainingWaitMs);
+                long elapsed = time.milliseconds() - begin;
+                if (elapsed >= maxWaitMs)
+                    throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");
+                remainingWaitMs = maxWaitMs - elapsed;
+            }
+        }
+    }
+
+    /**
+     * Validate that the record size isn't too large
+     */
+    private void ensureValidRecordSize(int size) {
+        if (size > this.maxRequestSize)
+            throw new RecordTooLargeException("The message is " + size +
+                                              " bytes when serialized which is larger than the maximum request size you have configured with the " +
+                                              ProducerConfig.MAX_REQUEST_SIZE_CONFIG +
+                                              " configuration.");
+        if (size > this.totalMemorySize)
+            throw new RecordTooLargeException("The message is " + size +
+                                              " bytes when serialized which is larger than the total memory buffer you have configured with the " +
+                                              ProducerConfig.BUFFER_MEMORY_CONFIG +
+                                              " configuration.");
     }
 
+    @Override
     public List<PartitionInfo> partitionsFor(String topic) {
-        return this.metadata.fetch(topic, this.metadataFetchTimeoutMs).partitionsFor(topic);
+        waitOnMetadata(topic, this.metadataFetchTimeoutMs);
+        return this.metadata.fetch().partitionsForTopic(topic);
     }
 
     @Override
@@ -256,6 +364,7 @@ public List<PartitionInfo> partitionsFor(String topic) {
      */
     @Override
     public void close() {
+        log.trace("Closing the Kafka producer.");
         this.sender.initiateClose();
         try {
             this.ioThread.join();
@@ -263,6 +372,9 @@ public void close() {
             throw new KafkaException(e);
         }
         this.metrics.close();
+        this.keySerializer.close();
+        this.valueSerializer.close();
+        log.debug("The Kafka producer has closed.");
     }
 
     private static class FutureFailure implements Future<RecordMetadata> {
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
index f43da80580f5a..34624c3b7a1f2 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
@@ -40,11 +40,11 @@
  * By default this mock will synchronously complete each send call successfully. However it can be configured to allow
  * the user to control the completion of the call and supply an optional error for the producer to throw.
  */
-public class MockProducer implements Producer {
+public class MockProducer implements Producer<byte[], byte[]> {
 
     private final Cluster cluster;
     private final Partitioner partitioner = new Partitioner();
-    private final List<ProducerRecord> sent;
+    private final List<ProducerRecord<byte[], byte[]>> sent;
     private final Deque<Completion> completions;
     private boolean autoComplete;
     private Map<TopicPartition, Long> offsets;
@@ -62,7 +62,7 @@ public MockProducer(Cluster cluster, boolean autoComplete) {
         this.cluster = cluster;
         this.autoComplete = autoComplete;
         this.offsets = new HashMap<TopicPartition, Long>();
-        this.sent = new ArrayList<ProducerRecord>();
+        this.sent = new ArrayList<ProducerRecord<byte[], byte[]>>();
         this.completions = new ArrayDeque<Completion>();
     }
 
@@ -90,7 +90,7 @@ public MockProducer() {
      * @see #history()
      */
     @Override
-    public synchronized Future<RecordMetadata> send(ProducerRecord record) {
+    public synchronized Future<RecordMetadata> send(ProducerRecord<byte[], byte[]> record) {
         return send(record, null);
     }
 
@@ -100,15 +100,15 @@ public synchronized Future<RecordMetadata> send(ProducerRecord record) {
      * @see #history()
      */
     @Override
-    public synchronized Future<RecordMetadata> send(ProducerRecord record, Callback callback) {
+    public synchronized Future<RecordMetadata> send(ProducerRecord<byte[], byte[]> record, Callback callback) {
         int partition = 0;
-        if (this.cluster.partitionsFor(record.topic()) != null)
+        if (this.cluster.partitionsForTopic(record.topic()) != null)
             partition = partitioner.partition(record, this.cluster);
         ProduceRequestResult result = new ProduceRequestResult();
         FutureRecordMetadata future = new FutureRecordMetadata(result, 0);
         TopicPartition topicPartition = new TopicPartition(record.topic(), partition);
         long offset = nextOffset(topicPartition);
-        Completion completion = new Completion(topicPartition, offset, new RecordMetadata(topicPartition, offset), result, callback);
+        Completion completion = new Completion(topicPartition, offset, new RecordMetadata(topicPartition, 0, offset), result, callback);
         this.sent.add(record);
         if (autoComplete)
             completion.complete(null);
@@ -133,7 +133,7 @@ private long nextOffset(TopicPartition tp) {
     }
 
     public List<PartitionInfo> partitionsFor(String topic) {
-        return this.cluster.partitionsFor(topic);
+        return this.cluster.partitionsForTopic(topic);
     }
 
     public Map<String, Metric> metrics() {
@@ -147,8 +147,8 @@ public void close() {
     /**
      * Get the list of sent records since the last call to {@link #clear()}
      */
-    public synchronized List<ProducerRecord> history() {
-        return new ArrayList<ProducerRecord>(this.sent);
+    public synchronized List<ProducerRecord<byte[], byte[]>> history() {
+        return new ArrayList<ProducerRecord<byte[], byte[]>>(this.sent);
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/Producer.java b/clients/src/main/java/org/apache/kafka/clients/producer/Producer.java
index 36e8398416036..5baa6062bd9ba 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/Producer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/Producer.java
@@ -31,7 +31,7 @@
  * @see KafkaProducer
  * @see MockProducer
  */
-public interface Producer extends Closeable {
+public interface Producer<K,V> extends Closeable {
 
     /**
      * Send the given record asynchronously and return a future which will eventually contain the response information.
@@ -39,12 +39,12 @@ public interface Producer extends Closeable {
      * @param record The record to send
      * @return A future which will eventually contain the response information
      */
-    public Future<RecordMetadata> send(ProducerRecord record);
+    public Future<RecordMetadata> send(ProducerRecord<K,V> record);
 
     /**
      * Send a record and invoke the given callback when the record has been acknowledged by the server
      */
-    public Future<RecordMetadata> send(ProducerRecord record, Callback callback);
+    public Future<RecordMetadata> send(ProducerRecord<K,V> record, Callback callback);
 
     /**
      * Get a list of partitions for the given topic for custom partition assignment. The partition metadata will change
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
index 502af5cd555dd..a893d88c2f4e2 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
@@ -1,147 +1,244 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
 import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
 import static org.apache.kafka.common.config.ConfigDef.Range.between;
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
 
+import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
+import org.apache.kafka.common.config.ConfigDef.Importance;
 import org.apache.kafka.common.config.ConfigDef.Type;
 
-
 /**
- * The producer configuration keys
+ * Configuration for the Kafka Producer. Documentation for these configurations can be found in the <a
+ * href="http://kafka.apache.org/documentation.html#new-producer">Kafka documentation</a>
  */
 public class ProducerConfig extends AbstractConfig {
 
-    private static final ConfigDef config;
-
-    /**
-     * A list of URLs to use for establishing the initial connection to the cluster. This list should be in the form
-     * <code>host1:port1,host2:port2,...</code>. These urls are just used for the initial connection to discover the
-     * full cluster membership (which may change dynamically) so this list need not contain the full set of servers (you
-     * may want more than one, though, in case a server is down).
-     */
-    public static final String BROKER_LIST_CONFIG = "metadata.broker.list";
-
-    /**
-     * The amount of time to block waiting to fetch metadata about a topic the first time a record is sent to that
-     * topic.
-     */
-    public static final String METADATA_FETCH_TIMEOUT_CONFIG = "metadata.fetch.timeout.ms";
-
-    /**
-     * The buffer size allocated for a partition. When records are received which are smaller than this size the
-     * producer will attempt to optimistically group them together until this size is reached.
+    /*
+     * NOTE: DO NOT CHANGE EITHER CONFIG STRINGS OR THEIR JAVA VARIABLE NAMES AS THESE ARE PART OF THE PUBLIC API AND
+     * CHANGE WILL BREAK USER CODE.
      */
-    public static final String MAX_PARTITION_SIZE_CONFIG = "max.partition.bytes";
 
-    /**
-     * The total memory used by the producer to buffer records waiting to be sent to the server. If records are sent
-     * faster than they can be delivered to the server the producer will either block or throw an exception based on the
-     * preference specified by {@link #BLOCK_ON_BUFFER_FULL}.
-     */
-    public static final String TOTAL_BUFFER_MEMORY_CONFIG = "total.memory.bytes";
+    private static final ConfigDef config;
 
-    /**
-     * The number of acknowledgments the producer requires from the server before considering a request complete.
-     */
-    public static final String REQUIRED_ACKS_CONFIG = "request.required.acks";
+    /** <code>bootstrap.servers</code> */
+    public static final String BOOTSTRAP_SERVERS_CONFIG = "bootstrap.servers";
+    private static final String BOOSTRAP_SERVERS_DOC = "A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. Data will be load " + "balanced over all servers irrespective of which servers are specified here for bootstrapping&mdash;this list only "
+                                                       + "impacts the initial hosts used to discover the full set of servers. This list should be in the form "
+                                                       + "<code>host1:port1,host2:port2,...</code>. Since these servers are just used for the initial connection to "
+                                                       + "discover the full cluster membership (which may change dynamically), this list need not contain the full set of "
+                                                       + "servers (you may want more than one, though, in case a server is down). If no server in this list is available sending "
+                                                       + "data will fail until on becomes available.";
 
-    /**
-     * The maximum amount of time the server will wait for acknowledgments from followers to meet the acknowledgment
-     * requirements the producer has specified. If the requested number of acknowledgments are not met an error will be
-     * returned.
-     */
-    public static final String REQUEST_TIMEOUT_CONFIG = "request.timeout.ms";
-
-    /**
-     * The producer groups together any records that arrive in between request sends. Normally this occurs only under
-     * load when records arrive faster than they can be sent out. However the client can reduce the number of requests
-     * and increase throughput by adding a small amount of artificial delay to force more records to batch together.
-     * This setting gives an upper bound on this delay. If we get {@link #MAX_PARTITION_SIZE_CONFIG} worth of records
-     * for a partition it will be sent immediately regardless of this setting, however if we have fewer than this many
-     * bytes accumulated for this partition we will "linger" for the specified time waiting for more records to show up.
-     * This setting defaults to 0.
-     */
+    /** <code>metadata.fetch.timeout.ms</code> */
+    public static final String METADATA_FETCH_TIMEOUT_CONFIG = "metadata.fetch.timeout.ms";
+    private static final String METADATA_FETCH_TIMEOUT_DOC = "The first time data is sent to a topic we must fetch metadata about that topic to know which servers host the " + "topic's partitions. This configuration controls the maximum amount of time we will block waiting for the metadata "
+                                                             + "fetch to succeed before throwing an exception back to the client.";
+
+    /** <code>metadata.max.age.ms</code> */
+    public static final String METADATA_MAX_AGE_CONFIG = "metadata.max.age.ms";
+    private static final String METADATA_MAX_AGE_DOC = "The period of time in milliseconds after which we force a refresh of metadata even if we haven't seen any " + " partition leadership changes to proactively discover any new brokers or partitions.";
+
+    /** <code>batch.size</code> */
+    public static final String BATCH_SIZE_CONFIG = "batch.size";
+    private static final String BATCH_SIZE_DOC = "The producer will attempt to batch records together into fewer requests whenever multiple records are being sent" + " to the same partition. This helps performance on both the client and the server. This configuration controls the "
+                                                 + "default batch size in bytes. "
+                                                 + "<p>"
+                                                 + "No attempt will be made to batch records larger than this size. "
+                                                 + "<p>"
+                                                 + "Requests sent to brokers will contain multiple batches, one for each partition with data available to be sent. "
+                                                 + "<p>"
+                                                 + "A small batch size will make batching less common and may reduce throughput (a batch size of zero will disable "
+                                                 + "batching entirely). A very large batch size may use memory a bit more wastefully as we will always allocate a "
+                                                 + "buffer of the specified batch size in anticipation of additional records.";
+
+    /** <code>buffer.memory</code> */
+    public static final String BUFFER_MEMORY_CONFIG = "buffer.memory";
+    private static final String BUFFER_MEMORY_DOC = "The total bytes of memory the producer can use to buffer records waiting to be sent to the server. If records are " + "sent faster than they can be delivered to the server the producer will either block or throw an exception based "
+                                                    + "on the preference specified by <code>block.on.buffer.full</code>. "
+                                                    + "<p>"
+                                                    + "This setting should correspond roughly to the total memory the producer will use, but is not a hard bound since "
+                                                    + "not all memory the producer uses is used for buffering. Some additional memory will be used for compression (if "
+                                                    + "compression is enabled) as well as for maintaining in-flight requests.";
+
+    /** <code>acks</code> */
+    public static final String ACKS_CONFIG = "acks";
+    private static final String ACKS_DOC = "The number of acknowledgments the producer requires the leader to have received before considering a request complete. This controls the "
+                                           + " durability of records that are sent. The following settings are common: "
+                                           + " <ul>"
+                                           + " <li><code>acks=0</code> If set to zero then the producer will not wait for any acknowledgment from the"
+                                           + " server at all. The record will be immediately added to the socket buffer and considered sent. No guarantee can be"
+                                           + " made that the server has received the record in this case, and the <code>retries</code> configuration will not"
+                                           + " take effect (as the client won't generally know of any failures). The offset given back for each record will"
+                                           + " always be set to -1."
+                                           + " <li><code>acks=1</code> This will mean the leader will write the record to its local log but will respond"
+                                           + " without awaiting full acknowledgement from all followers. In this case should the leader fail immediately after"
+                                           + " acknowledging the record but before the followers have replicated it then the record will be lost."
+                                           + " <li><code>acks=all</code> This means the leader will wait for the full set of in-sync replicas to"
+                                           + " acknowledge the record. This guarantees that the record will not be lost as long as at least one in-sync replica"
+                                           + " remains alive. This is the strongest available guarantee.";
+
+    /** <code>timeout.ms</code> */
+    public static final String TIMEOUT_CONFIG = "timeout.ms";
+    private static final String TIMEOUT_DOC = "The configuration controls the maximum amount of time the server will wait for acknowledgments from followers to " + "meet the acknowledgment requirements the producer has specified with the <code>acks</code> configuration. If the "
+                                              + "requested number of acknowledgments are not met when the timeout elapses an error will be returned. This timeout "
+                                              + "is measured on the server side and does not include the network latency of the request.";
+
+    /** <code>linger.ms</code> */
     public static final String LINGER_MS_CONFIG = "linger.ms";
-
-    /**
-     * Force a refresh of the cluster metadata after this period of time. This ensures that changes to the number of
-     * partitions or other settings will by taken up by producers without restart.
-     */
-    public static final String METADATA_REFRESH_MS_CONFIG = "topic.metadata.refresh.interval.ms";
-
-    /**
-     * The id string to pass to the server when making requests. The purpose of this is to be able to track the source
-     * of requests beyond just ip/port by allowing a logical application name to be included.
-     */
+    private static final String LINGER_MS_DOC = "The producer groups together any records that arrive in between request transmissions into a single batched request. " + "Normally this occurs only under load when records arrive faster than they can be sent out. However in some circumstances the client may want to "
+                                                + "reduce the number of requests even under moderate load. This setting accomplishes this by adding a small amount "
+                                                + "of artificial delay&mdash;that is, rather than immediately sending out a record the producer will wait for up to "
+                                                + "the given delay to allow other records to be sent so that the sends can be batched together. This can be thought "
+                                                + "of as analogous to Nagle's algorithm in TCP. This setting gives the upper bound on the delay for batching: once "
+                                                + "we get <code>batch.size</code> worth of records for a partition it will be sent immediately regardless of this "
+                                                + "setting, however if we have fewer than this many bytes accumulated for this partition we will 'linger' for the "
+                                                + "specified time waiting for more records to show up. This setting defaults to 0 (i.e. no delay). Setting <code>linger.ms=5</code>, "
+                                                + "for example, would have the effect of reducing the number of requests sent but would add up to 5ms of latency to records sent in the absense of load.";
+
+    /** <code>client.id</code> */
     public static final String CLIENT_ID_CONFIG = "client.id";
+    private static final String CLIENT_ID_DOC = "The id string to pass to the server when making requests. The purpose of this is to be able to track the source " + "of requests beyond just ip/port by allowing a logical application name to be included with the request. The "
+                                                + "application can set any string it wants as this has no functional purpose other than in logging and metrics.";
 
-    /**
-     * The size of the TCP send buffer to use when sending data
-     */
+    /** <code>send.buffer.bytes</code> */
     public static final String SEND_BUFFER_CONFIG = "send.buffer.bytes";
+    private static final String SEND_BUFFER_DOC = "The size of the TCP send buffer to use when sending data";
 
-    /**
-     * The maximum size of a request. This is also effectively a cap on the maximum record size. Note that the server
-     * has its own cap on record size which may be different from this.
-     */
+    /** <code>receive.buffer.bytes</code> */
+    public static final String RECEIVE_BUFFER_CONFIG = "receive.buffer.bytes";
+    private static final String RECEIVE_BUFFER_DOC = "The size of the TCP receive buffer to use when reading data";
+
+    /** <code>max.request.size</code> */
     public static final String MAX_REQUEST_SIZE_CONFIG = "max.request.size";
+    private static final String MAX_REQUEST_SIZE_DOC = "The maximum size of a request. This is also effectively a cap on the maximum record size. Note that the server " + "has its own cap on record size which may be different from this. This setting will limit the number of record "
+                                                       + "batches the producer will send in a single request to avoid sending huge requests.";
 
-    /**
-     * The amount of time to wait before attempting to reconnect to a given host. This avoids repeated connecting to a
-     * host in a tight loop.
-     */
+    /** <code>reconnect.backoff.ms</code> */
     public static final String RECONNECT_BACKOFF_MS_CONFIG = "reconnect.backoff.ms";
-
-    /**
-     * When our memory buffer is exhausted we must either stop accepting new records (block) or throw errors. By default
-     * this setting is true and we block, however users who want to guarantee we never block can turn this into an
-     * error.
-     */
-    public static final String BLOCK_ON_BUFFER_FULL = "block.on.buffer.full";
-
-    public static final String ENABLE_JMX = "enable.jmx";
+    private static final String RECONNECT_BACKOFF_MS_DOC = "The amount of time to wait before attempting to reconnect to a given host when a connection fails." + " This avoids a scenario where the client repeatedly attempts to connect to a host in a tight loop.";
+
+    /** <code>block.on.buffer.full</code> */
+    public static final String BLOCK_ON_BUFFER_FULL_CONFIG = "block.on.buffer.full";
+    private static final String BLOCK_ON_BUFFER_FULL_DOC = "When our memory buffer is exhausted we must either stop accepting new records (block) or throw errors. By default " + "this setting is true and we block, however in some scenarios blocking is not desirable and it is better to "
+                                                           + "immediately give an error. Setting this to <code>false</code> will accomplish that: the producer will throw a BufferExhaustedException if a recrord is sent and the buffer space is full.";
+
+    /** <code>retries</code> */
+    public static final String RETRIES_CONFIG = "retries";
+    private static final String RETRIES_DOC = "Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error." + " Note that this retry is no different than if the client resent the record upon receiving the "
+                                              + "error. Allowing retries will potentially change the ordering of records because if two records are "
+                                              + "sent to a single partition, and the first fails and is retried but the second succeeds, then the second record "
+                                              + "may appear first.";
+
+    /** <code>retry.backoff.ms</code> */
+    public static final String RETRY_BACKOFF_MS_CONFIG = "retry.backoff.ms";
+    private static final String RETRY_BACKOFF_MS_DOC = "The amount of time to wait before attempting to retry a failed produce request to a given topic partition." + " This avoids repeated sending-and-failing in a tight loop.";
+
+    /** <code>compression.type</code> */
+    public static final String COMPRESSION_TYPE_CONFIG = "compression.type";
+    private static final String COMPRESSION_TYPE_DOC = "The compression type for all data generated by the producer. The default is none (i.e. no compression). Valid " + " values are <code>none</code>, <code>gzip</code>, <code>snappy</code>, or <code>lz4</code>. "
+                                                       + "Compression is of full batches of data, so the efficacy of batching will also impact the compression ratio (more batching means better compression).";
+
+    /** <code>metrics.sample.window.ms</code> */
+    public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = "metrics.sample.window.ms";
+    private static final String METRICS_SAMPLE_WINDOW_MS_DOC = "The metrics system maintains a configurable number of samples over a fixed window size. This configuration " + "controls the size of the window. For example we might maintain two samples each measured over a 30 second period. "
+                                                               + "When a window expires we erase and overwrite the oldest window.";
+
+    /** <code>metrics.num.samples</code> */
+    public static final String METRICS_NUM_SAMPLES_CONFIG = "metrics.num.samples";
+    private static final String METRICS_NUM_SAMPLES_DOC = "The number of samples maintained to compute metrics.";
+
+    /** <code>metric.reporters</code> */
+    public static final String METRIC_REPORTER_CLASSES_CONFIG = "metric.reporters";
+    private static final String METRIC_REPORTER_CLASSES_DOC = "A list of classes to use as metrics reporters. Implementing the <code>MetricReporter</code> interface allows " + "plugging in classes that will be notified of new metric creation. The JmxReporter is always included to register JMX statistics.";
+
+    /** <code>max.in.flight.requests.per.connection</code> */
+    public static final String MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION = "max.in.flight.requests.per.connection";
+    private static final String MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION_DOC = "The maximum number of unacknowledged requests the client will send on a single connection before blocking."
+                                                                            + " Note that if this setting is set to be greater than 1 and there are failed sends, there is a risk of"
+                                                                            + " message re-ordering due to retries (i.e., if retries are enabled).";
+
+    /** <code>key.serializer</code> */
+    public static final String KEY_SERIALIZER_CLASS_CONFIG = "key.serializer";
+    private static final String KEY_SERIALIZER_CLASS_DOC = "Serializer class for key that implements the <code>Serializer</code> interface.";
+
+    /** <code>value.serializer</code> */
+    public static final String VALUE_SERIALIZER_CLASS_CONFIG = "value.serializer";
+    private static final String VALUE_SERIALIZER_CLASS_DOC = "Serializer class for value that implements the <code>Serializer</code> interface.";
 
     static {
-        /* TODO: add docs */
-        config = new ConfigDef().define(BROKER_LIST_CONFIG, Type.LIST, "blah blah")
-                                .define(METADATA_FETCH_TIMEOUT_CONFIG, Type.LONG, 60 * 1000, atLeast(0), "blah blah")
-                                .define(MAX_PARTITION_SIZE_CONFIG, Type.INT, 16384, atLeast(0), "blah blah")
-                                .define(TOTAL_BUFFER_MEMORY_CONFIG, Type.LONG, 32 * 1024 * 1024L, atLeast(0L), "blah blah")
-                                /* TODO: should be a string to handle acks=in-sync */
-                                .define(REQUIRED_ACKS_CONFIG, Type.INT, 1, between(-1, Short.MAX_VALUE), "blah blah")
-                                .define(REQUEST_TIMEOUT_CONFIG, Type.INT, 30 * 1000, atLeast(0), "blah blah")
-                                .define(LINGER_MS_CONFIG, Type.LONG, 0, atLeast(0L), "blah blah")
-                                .define(METADATA_REFRESH_MS_CONFIG, Type.LONG, 10 * 60 * 1000, atLeast(-1L), "blah blah")
-                                .define(CLIENT_ID_CONFIG, Type.STRING, "", "blah blah")
-                                .define(SEND_BUFFER_CONFIG, Type.INT, 128 * 1024, atLeast(0), "blah blah")
-                                .define(MAX_REQUEST_SIZE_CONFIG, Type.INT, 1 * 1024 * 1024, atLeast(0), "blah blah")
-                                .define(RECONNECT_BACKOFF_MS_CONFIG, Type.LONG, 10L, atLeast(0L), "blah blah")
-                                .define(BLOCK_ON_BUFFER_FULL, Type.BOOLEAN, true, "blah blah")
-                                .define(ENABLE_JMX, Type.BOOLEAN, true, "");
+        config = new ConfigDef().define(BOOTSTRAP_SERVERS_CONFIG, Type.LIST, Importance.HIGH, BOOSTRAP_SERVERS_DOC)
+                                .define(BUFFER_MEMORY_CONFIG, Type.LONG, 32 * 1024 * 1024L, atLeast(0L), Importance.HIGH, BUFFER_MEMORY_DOC)
+                                .define(RETRIES_CONFIG, Type.INT, 0, between(0, Integer.MAX_VALUE), Importance.HIGH, RETRIES_DOC)
+                                .define(ACKS_CONFIG,
+                                        Type.STRING,
+                                        "1",
+                                        in(Arrays.asList("all", "-1", "0", "1")),
+                                        Importance.HIGH,
+                                        ACKS_DOC)
+                                .define(COMPRESSION_TYPE_CONFIG, Type.STRING, "none", Importance.HIGH, COMPRESSION_TYPE_DOC)
+                                .define(BATCH_SIZE_CONFIG, Type.INT, 16384, atLeast(0), Importance.MEDIUM, BATCH_SIZE_DOC)
+                                .define(TIMEOUT_CONFIG, Type.INT, 30 * 1000, atLeast(0), Importance.MEDIUM, TIMEOUT_DOC)
+                                .define(LINGER_MS_CONFIG, Type.LONG, 0, atLeast(0L), Importance.MEDIUM, LINGER_MS_DOC)
+                                .define(CLIENT_ID_CONFIG, Type.STRING, "", Importance.MEDIUM, CLIENT_ID_DOC)
+                                .define(SEND_BUFFER_CONFIG, Type.INT, 128 * 1024, atLeast(0), Importance.MEDIUM, SEND_BUFFER_DOC)
+                                .define(RECEIVE_BUFFER_CONFIG, Type.INT, 32 * 1024, atLeast(0), Importance.MEDIUM, RECEIVE_BUFFER_DOC)
+                                .define(MAX_REQUEST_SIZE_CONFIG,
+                                        Type.INT,
+                                        1 * 1024 * 1024,
+                                        atLeast(0),
+                                        Importance.MEDIUM,
+                                        MAX_REQUEST_SIZE_DOC)
+                                .define(BLOCK_ON_BUFFER_FULL_CONFIG, Type.BOOLEAN, true, Importance.LOW, BLOCK_ON_BUFFER_FULL_DOC)
+                                .define(RECONNECT_BACKOFF_MS_CONFIG, Type.LONG, 10L, atLeast(0L), Importance.LOW, RECONNECT_BACKOFF_MS_DOC)
+                                .define(METRIC_REPORTER_CLASSES_CONFIG, Type.LIST, "", Importance.LOW, METRIC_REPORTER_CLASSES_DOC)
+                                .define(RETRY_BACKOFF_MS_CONFIG, Type.LONG, 100L, atLeast(0L), Importance.LOW, RETRY_BACKOFF_MS_DOC)
+                                .define(METADATA_FETCH_TIMEOUT_CONFIG,
+                                        Type.LONG,
+                                        60 * 1000,
+                                        atLeast(0),
+                                        Importance.LOW,
+                                        METADATA_FETCH_TIMEOUT_DOC)
+                                .define(METADATA_MAX_AGE_CONFIG, Type.LONG, 5 * 60 * 1000, atLeast(0), Importance.LOW, METADATA_MAX_AGE_DOC)
+                                .define(METRICS_SAMPLE_WINDOW_MS_CONFIG,
+                                        Type.LONG,
+                                        30000,
+                                        atLeast(0),
+                                        Importance.LOW,
+                                        METRICS_SAMPLE_WINDOW_MS_DOC)
+                                .define(METRICS_NUM_SAMPLES_CONFIG, Type.INT, 2, atLeast(1), Importance.LOW, METRICS_NUM_SAMPLES_DOC)
+                                .define(MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION,
+                                        Type.INT,
+                                        5,
+                                        atLeast(1),
+                                        Importance.LOW,
+                                        MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION_DOC)
+                                .define(KEY_SERIALIZER_CLASS_CONFIG, Type.CLASS, "org.apache.kafka.clients.producer.ByteArraySerializer", Importance.HIGH, KEY_SERIALIZER_CLASS_DOC)
+                                .define(VALUE_SERIALIZER_CLASS_CONFIG, Type.CLASS, "org.apache.kafka.clients.producer.ByteArraySerializer", Importance.HIGH, VALUE_SERIALIZER_CLASS_DOC);
     }
 
     ProducerConfig(Map<? extends Object, ? extends Object> props) {
         super(config, props);
     }
 
+    public static void main(String[] args) {
+        System.out.println(config.toHtmlTable());
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerRecord.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerRecord.java
index 034bf33385fe3..065d4e6c6a496 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerRecord.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerRecord.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
@@ -24,12 +20,12 @@
  * specified but a key is present a partition will be chosen using a hash of the key. If neither key nor partition is
  * present a partition will be assigned in a round-robin fashion.
  */
-public final class ProducerRecord {
+public final class ProducerRecord<K, V> {
 
     private final String topic;
     private final Integer partition;
-    private final byte[] key;
-    private final byte[] value;
+    private final K key;
+    private final V value;
 
     /**
      * Creates a record to be sent to a specified topic and partition
@@ -39,7 +35,7 @@ public final class ProducerRecord {
      * @param key The key that will be included in the record
      * @param value The record contents
      */
-    public ProducerRecord(String topic, Integer partition, byte[] key, byte[] value) {
+    public ProducerRecord(String topic, Integer partition, K key, V value) {
         if (topic == null)
             throw new IllegalArgumentException("Topic cannot be null");
         this.topic = topic;
@@ -55,7 +51,7 @@ public ProducerRecord(String topic, Integer partition, byte[] key, byte[] value)
      * @param key The key that will be included in the record
      * @param value The record contents
      */
-    public ProducerRecord(String topic, byte[] key, byte[] value) {
+    public ProducerRecord(String topic, K key, V value) {
         this(topic, null, key, value);
     }
 
@@ -65,7 +61,7 @@ public ProducerRecord(String topic, byte[] key, byte[] value) {
      * @param topic The topic this record should be sent to
      * @param value The record contents
      */
-    public ProducerRecord(String topic, byte[] value) {
+    public ProducerRecord(String topic, V value) {
         this(topic, null, value);
     }
 
@@ -79,14 +75,14 @@ public String topic() {
     /**
      * The key (or null if no key is specified)
      */
-    public byte[] key() {
+    public K key() {
         return key;
     }
 
     /**
      * @return The value
      */
-    public byte[] value() {
+    public V value() {
         return value;
     }
 
@@ -97,4 +93,10 @@ public Integer partition() {
         return partition;
     }
 
+    @Override
+    public String toString() {
+        String key = this.key == null ? "null" : this.key.toString();
+        String value = this.value == null ? "null" : this.value.toString();
+        return "ProducerRecord(topic=" + topic + ", partition=" + partition + ", key=" + key + ", value=" + value;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/RecordMetadata.java b/clients/src/main/java/org/apache/kafka/clients/producer/RecordMetadata.java
index 8c776980ef1f5..8015f0da397e2 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/RecordMetadata.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/RecordMetadata.java
@@ -26,12 +26,18 @@ public final class RecordMetadata {
     private final long offset;
     private final TopicPartition topicPartition;
 
-    public RecordMetadata(TopicPartition topicPartition, long offset) {
+    private RecordMetadata(TopicPartition topicPartition, long offset) {
         super();
         this.offset = offset;
         this.topicPartition = topicPartition;
     }
 
+    public RecordMetadata(TopicPartition topicPartition, long baseOffset, long relativeOffset) {
+        // ignore the relativeOffset if the base offset is -1,
+        // since this indicates the offset is unknown
+        this(topicPartition, baseOffset == -1 ? baseOffset : baseOffset + relativeOffset);
+    }
+
     /**
      * The offset of the record in the topic/partition.
      */
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/Serializer.java b/clients/src/main/java/org/apache/kafka/clients/producer/Serializer.java
new file mode 100644
index 0000000000000..0378683508396
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/Serializer.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.clients.producer;
+
+import org.apache.kafka.common.Configurable;
+
+/**
+ *
+ * @param <T> Type to be serialized from.
+ *
+ * A class that implements this interface is expected to have a constructor with no parameter.
+ */
+public interface Serializer<T> extends Configurable {
+    /**
+     *
+     * @param topic Topic associated with data
+     * @param data Typed data
+     * @param isKey Is data for key or value
+     * @return bytes of the serialized data
+     */
+    public byte[] serialize(String topic, T data, boolean isKey);
+
+    /**
+     * Close this serializer
+     */
+    public void close();
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
index 5bed60730ea10..aa91e1444a49c 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
@@ -16,14 +16,19 @@
  */
 package org.apache.kafka.clients.producer.internals;
 
+import org.apache.kafka.clients.producer.BufferExhaustedException;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Rate;
+import org.apache.kafka.common.utils.Time;
+
 import java.nio.ByteBuffer;
 import java.util.ArrayDeque;
 import java.util.Deque;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
-import org.apache.kafka.clients.producer.BufferExhaustedException;
-
 
 /**
  * A pool of ByteBuffers kept under a given memory limit. This class is fairly specific to the needs of the producer. In
@@ -44,6 +49,9 @@ public final class BufferPool {
     private final Deque<ByteBuffer> free;
     private final Deque<Condition> waiters;
     private long availableMemory;
+    private final Metrics metrics;
+    private final Time time;
+    private final Sensor waitTime;
 
     /**
      * Create a new buffer pool
@@ -54,7 +62,7 @@ public final class BufferPool {
      *        {@link #allocate(int)} call will block and wait for memory to be returned to the pool. If false
      *        {@link #allocate(int)} will throw an exception if the buffer is out of memory.
      */
-    public BufferPool(long memory, int poolableSize, boolean blockOnExhaustion) {
+    public BufferPool(long memory, int poolableSize, boolean blockOnExhaustion, Metrics metrics, Time time) {
         this.poolableSize = poolableSize;
         this.blockOnExhaustion = blockOnExhaustion;
         this.lock = new ReentrantLock();
@@ -62,15 +70,22 @@ public BufferPool(long memory, int poolableSize, boolean blockOnExhaustion) {
         this.waiters = new ArrayDeque<Condition>();
         this.totalMemory = memory;
         this.availableMemory = memory;
-    }
+        this.metrics = metrics;
+        this.time = time;
+        this.waitTime = this.metrics.sensor("bufferpool-wait-time");
+        this.waitTime.add("bufferpool-wait-ratio",
+                          "The fraction of time an appender waits for space allocation.",
+                          new Rate(TimeUnit.NANOSECONDS));
+   }
 
     /**
-     * Allocate a buffer of the given size
+     * Allocate a buffer of the given size. This method blocks if there is not enough memory and the buffer pool
+     * is configured with blocking mode.
      * 
      * @param size The buffer size to allocate in bytes
      * @return The buffer
      * @throws InterruptedException If the thread is interrupted while blocked
-     * @throws IllegalArgument if size is larger than the total memory controlled by the pool (and hence we would block
+     * @throws IllegalArgumentException if size is larger than the total memory controlled by the pool (and hence we would block
      *         forever)
      * @throws BufferExhaustedException if the pool is in non-blocking mode and size exceeds the free memory in the pool
      */
@@ -110,7 +125,11 @@ public ByteBuffer allocate(int size) throws InterruptedException {
                 // loop over and over until we have a buffer or have reserved
                 // enough memory to allocate one
                 while (accumulated < size) {
+                    long startWait = time.nanoseconds();
                     moreMemory.await();
+                    long endWait = time.nanoseconds();
+                    this.waitTime.record(endWait - startWait, time.milliseconds());
+
                     // check if we can satisfy this request from the free list,
                     // otherwise allocate memory
                     if (accumulated == 0 && size == this.poolableSize && !this.free.isEmpty()) {
@@ -166,28 +185,31 @@ private void freeUp(int size) {
      * Return buffers to the pool. If they are of the poolable size add them to the free list, otherwise just mark the
      * memory as free.
      * 
-     * @param buffers The buffers to return
+     * @param buffer The buffer to return
+     * @param size The size of the buffer to mark as deallocated, note that this maybe smaller than buffer.capacity
+     *             since the buffer may re-allocate itself during in-place compression
      */
-    public void deallocate(ByteBuffer... buffers) {
+    public void deallocate(ByteBuffer buffer, int size) {
         lock.lock();
         try {
-            for (int i = 0; i < buffers.length; i++) {
-                int size = buffers[i].capacity();
-                if (size == this.poolableSize) {
-                    buffers[i].clear();
-                    this.free.add(buffers[i]);
-                } else {
-                    this.availableMemory += size;
-                }
-                Condition moreMem = this.waiters.peekFirst();
-                if (moreMem != null)
-                    moreMem.signal();
+            if (size == this.poolableSize && size == buffer.capacity()) {
+                buffer.clear();
+                this.free.add(buffer);
+            } else {
+                this.availableMemory += size;
             }
+            Condition moreMem = this.waiters.peekFirst();
+            if (moreMem != null)
+                moreMem.signal();
         } finally {
             lock.unlock();
         }
     }
 
+    public void deallocate(ByteBuffer buffer) {
+        deallocate(buffer, buffer.capacity());
+    }
+
     /**
      * the total free memory both unallocated and in the free list
      */
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java
new file mode 100644
index 0000000000000..678d1c6accd2a
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ErrorLoggingCallback implements Callback {
+    private static final Logger log = LoggerFactory.getLogger(ErrorLoggingCallback.class);
+    private String topic;
+    private byte[] key;
+    private byte[] value;
+    private boolean logAsString;
+
+    public ErrorLoggingCallback(String topic, byte[] key, byte[] value, boolean logAsString) {
+        this.topic = topic;
+        this.key = key;
+        this.value = value;
+        this.logAsString = logAsString;
+    }
+
+    public void onCompletion(RecordMetadata metadata, Exception e) {
+        if (e != null) {
+            String keyString = (key == null) ? "null" :
+                    logAsString ? new String(key) : key.length + " bytes";
+            String valueString = (value == null) ? "null" :
+                    logAsString ? new String(value) : value.length + " bytes";
+            log.error("Error when sending message to topic {} with key: {}, value: {} with error: {}",
+                      topic, keyString, valueString, e.getMessage());
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/FutureRecordMetadata.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/FutureRecordMetadata.java
index 22d4c79bc06fb..4a2da41f47994 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/FutureRecordMetadata.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/FutureRecordMetadata.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer.internals;
 
@@ -23,7 +19,6 @@
 
 import org.apache.kafka.clients.producer.RecordMetadata;
 
-
 /**
  * The future result of a record send
  */
@@ -60,7 +55,7 @@ private RecordMetadata valueOrError() throws ExecutionException {
         if (this.result.error() != null)
             throw new ExecutionException(this.result.error());
         else
-            return new RecordMetadata(result.topicPartition(), this.result.baseOffset() + this.relativeOffset);
+            return new RecordMetadata(result.topicPartition(), this.result.baseOffset(), this.relativeOffset);
     }
 
     public long relativeOffset() {
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Metadata.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Metadata.java
index 52d30a86d0439..1d30f9edd9533 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Metadata.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Metadata.java
@@ -1,29 +1,24 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer.internals;
 
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 import org.apache.kafka.common.Cluster;
-import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.errors.TimeoutException;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A class encapsulating some of the logic around metadata.
@@ -35,11 +30,14 @@
  */
 public final class Metadata {
 
+    private static final Logger log = LoggerFactory.getLogger(Metadata.class);
+
     private final long refreshBackoffMs;
     private final long metadataExpireMs;
-    private long lastRefresh;
+    private int version;
+    private long lastRefreshMs;
     private Cluster cluster;
-    private boolean forceUpdate;
+    private boolean needUpdate;
     private final Set<String> topics;
 
     /**
@@ -58,9 +56,10 @@ public Metadata() {
     public Metadata(long refreshBackoffMs, long metadataExpireMs) {
         this.refreshBackoffMs = refreshBackoffMs;
         this.metadataExpireMs = metadataExpireMs;
-        this.lastRefresh = 0L;
+        this.lastRefreshMs = 0L;
+        this.version = 0;
         this.cluster = Cluster.empty();
-        this.forceUpdate = false;
+        this.needUpdate = false;
         this.topics = new HashSet<String>();
     }
 
@@ -72,49 +71,47 @@ public synchronized Cluster fetch() {
     }
 
     /**
-     * Fetch cluster metadata including partitions for the given topic. If there is no metadata for the given topic,
-     * block waiting for an update.
-     * @param topic The topic we want metadata for
-     * @param maxWaitMs The maximum amount of time to block waiting for metadata
+     * Add the topic to maintain in the metadata
      */
-    public synchronized Cluster fetch(String topic, long maxWaitMs) {
-        List<PartitionInfo> partitions = null;
-        do {
-            partitions = cluster.partitionsFor(topic);
-            if (partitions == null) {
-                long begin = System.currentTimeMillis();
-                topics.add(topic);
-                forceUpdate = true;
-                try {
-                    wait(maxWaitMs);
-                } catch (InterruptedException e) { /* this is fine, just try again */
-                }
-                long ellapsed = System.currentTimeMillis() - begin;
-                if (ellapsed > maxWaitMs)
-                    throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");
-            } else {
-                return cluster;
-            }
-        } while (true);
+    public synchronized void add(String topic) {
+        topics.add(topic);
     }
 
     /**
-     * Does the current cluster info need to be updated? An update is needed if it has been at least refreshBackoffMs
-     * since our last update and either (1) an update has been requested or (2) the current metadata has expired (more
-     * than metadataExpireMs has passed since the last refresh)
+     * The next time to update the cluster info is the maximum of the time the current info will expire
+     * and the time the current info can be updated (i.e. backoff time has elapsed); If an update has
+     * been request then the expiry time is now
      */
-    public synchronized boolean needsUpdate(long now) {
-        long msSinceLastUpdate = now - this.lastRefresh;
-        boolean updateAllowed = msSinceLastUpdate >= this.refreshBackoffMs;
-        boolean updateNeeded = this.forceUpdate || msSinceLastUpdate >= this.metadataExpireMs;
-        return updateAllowed && updateNeeded;
+    public synchronized long timeToNextUpdate(long nowMs) {
+        long timeToExpire = needUpdate ? 0 : Math.max(this.lastRefreshMs + this.metadataExpireMs - nowMs, 0);
+        long timeToAllowUpdate = this.lastRefreshMs + this.refreshBackoffMs - nowMs;
+        return Math.max(timeToExpire, timeToAllowUpdate);
     }
 
     /**
-     * Force an update of the current cluster info
+     * Request an update of the current cluster metadata info, return the current version before the update
      */
-    public synchronized void forceUpdate() {
-        this.forceUpdate = true;
+    public synchronized int requestUpdate() {
+        this.needUpdate = true;
+        return this.version;
+    }
+
+    /**
+     * Wait for metadata update until the current version is larger than the last version we know of
+     */
+    public synchronized void awaitUpdate(int lastVerison, long maxWaitMs) {
+        long begin = System.currentTimeMillis();
+        long remainingWaitMs = maxWaitMs;
+        while (this.version <= lastVerison) {
+            try {
+                wait(remainingWaitMs);
+            } catch (InterruptedException e) { /* this is fine */
+            }
+            long elapsed = System.currentTimeMillis() - begin;
+            if (elapsed >= maxWaitMs)
+                throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");
+            remainingWaitMs = maxWaitMs - elapsed;
+        }
     }
 
     /**
@@ -128,10 +125,25 @@ public synchronized Set<String> topics() {
      * Update the cluster metadata
      */
     public synchronized void update(Cluster cluster, long now) {
-        this.forceUpdate = false;
-        this.lastRefresh = now;
+        this.needUpdate = false;
+        this.lastRefreshMs = now;
+        this.version += 1;
         this.cluster = cluster;
         notifyAll();
+        log.debug("Updated cluster metadata version {} to {}", this.version, this.cluster);
+    }
+
+    /**
+     * The last time metadata was updated.
+     */
+    public synchronized long lastUpdate() {
+        return this.lastRefreshMs;
     }
 
+    /**
+     * The metadata refresh backoff in ms
+     */
+    public long refreshBackoff() {
+        return refreshBackoffMs;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java
index fbb732a575221..483899d2e69b3 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Partitioner.java
@@ -41,10 +41,10 @@ public class Partitioner {
      * Compute the partition for the given record.
      * 
      * @param record The record being sent
-     * @param numPartitions The total number of partitions for the given topic
+     * @param cluster The current cluster metadata
      */
-    public int partition(ProducerRecord record, Cluster cluster) {
-        List<PartitionInfo> partitions = cluster.partitionsFor(record.topic());
+    public int partition(ProducerRecord<byte[], byte[]> record, Cluster cluster) {
+        List<PartitionInfo> partitions = cluster.partitionsForTopic(record.topic());
         int numPartitions = partitions.size();
         if (record.partition() != null) {
             // they have given us a partition, use it
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
index be8a4a399b8b4..c15485d1af304 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
@@ -1,32 +1,33 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer.internals;
 
 import java.nio.ByteBuffer;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 
 import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.metrics.Measurable;
 import org.apache.kafka.common.metrics.MetricConfig;
@@ -38,24 +39,28 @@
 import org.apache.kafka.common.utils.CopyOnWriteMap;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
- * This class acts as a queue that accumulates records into {@link org.apache.kafka.common.record.MemoryRecords} instances to be
- * sent to the server.
+ * This class acts as a queue that accumulates records into {@link org.apache.kafka.common.record.MemoryRecords}
+ * instances to be sent to the server.
  * <p>
  * The accumulator uses a bounded amount of memory and append calls will block when that memory is exhausted, unless
  * this behavior is explicitly disabled.
  */
 public final class RecordAccumulator {
 
+    private static final Logger log = LoggerFactory.getLogger(RecordAccumulator.class);
+
     private volatile boolean closed;
     private int drainIndex;
     private final int batchSize;
     private final long lingerMs;
-    private final ConcurrentMap<TopicPartition, Deque<RecordBatch>> batches;
+    private final long retryBackoffMs;
     private final BufferPool free;
     private final Time time;
+    private final ConcurrentMap<TopicPartition, Deque<RecordBatch>> batches;
 
     /**
      * Create a new record accumulator
@@ -65,39 +70,48 @@ public final class RecordAccumulator {
      * @param lingerMs An artificial delay time to add before declaring a records instance that isn't full ready for
      *        sending. This allows time for more records to arrive. Setting a non-zero lingerMs will trade off some
      *        latency for potentially better throughput due to more batching (and hence fewer, larger requests).
+     * @param retryBackoffMs An artificial delay time to retry the produce request upon receiving an error. This avoids
+     *        exhausting all retries in a short period of time.
      * @param blockOnBufferFull If true block when we are out of memory; if false throw an exception when we are out of
      *        memory
      * @param metrics The metrics
      * @param time The time instance to use
      */
-    public RecordAccumulator(int batchSize, long totalSize, long lingerMs, boolean blockOnBufferFull, Metrics metrics, Time time) {
+    public RecordAccumulator(int batchSize,
+                             long totalSize,
+                             long lingerMs,
+                             long retryBackoffMs,
+                             boolean blockOnBufferFull,
+                             Metrics metrics,
+                             Time time) {
         this.drainIndex = 0;
         this.closed = false;
         this.batchSize = batchSize;
         this.lingerMs = lingerMs;
+        this.retryBackoffMs = retryBackoffMs;
         this.batches = new CopyOnWriteMap<TopicPartition, Deque<RecordBatch>>();
-        this.free = new BufferPool(totalSize, batchSize, blockOnBufferFull);
+        this.free = new BufferPool(totalSize, batchSize, blockOnBufferFull, metrics, time);
         this.time = time;
         registerMetrics(metrics);
     }
 
     private void registerMetrics(Metrics metrics) {
-        metrics.addMetric("blocked_threads",
+        metrics.addMetric("waiting-threads",
                           "The number of user threads blocked waiting for buffer memory to enqueue their records",
                           new Measurable() {
                               public double measure(MetricConfig config, long now) {
                                   return free.queued();
                               }
                           });
-        metrics.addMetric("buffer_total_bytes",
-                          "The total amount of buffer memory that is available (not currently used for buffering records).",
+        metrics.addMetric("buffer-total-bytes",
+                          "The maximum amount of buffer memory the client can use (whether or not it is currently used).",
                           new Measurable() {
                               public double measure(MetricConfig config, long now) {
                                   return free.totalMemory();
                               }
                           });
-        metrics.addMetric("buffer_available_bytes",
-                          "The total amount of buffer memory that is available (not currently used for buffering records).",
+        metrics.addMetric("buffer-available-bytes",
+                          "The total amount of buffer memory that is not being used (either unallocated or in the free list).",
                           new Measurable() {
                               public double measure(MetricConfig config, long now) {
                                   return free.availableMemory();
@@ -106,55 +120,75 @@ public double measure(MetricConfig config, long now) {
     }
 
     /**
-     * Add a record to the accumulator.
+     * Add a record to the accumulator, return the append result
      * <p>
-     * This method will block if sufficient memory isn't available for the record unless blocking has been disabled.
-     * 
+     * The append result will contain the future metadata, and flag for whether the appended batch is full or a new batch is created
+     * <p>
+     *
      * @param tp The topic/partition to which this record is being sent
      * @param key The key for the record
      * @param value The value for the record
      * @param compression The compression codec for the record
      * @param callback The user-supplied callback to execute when the request is complete
      */
-    public FutureRecordMetadata append(TopicPartition tp, byte[] key, byte[] value, CompressionType compression, Callback callback) throws InterruptedException {
+    public RecordAppendResult append(TopicPartition tp, byte[] key, byte[] value, CompressionType compression, Callback callback) throws InterruptedException {
         if (closed)
             throw new IllegalStateException("Cannot send after the producer is closed.");
         // check if we have an in-progress batch
         Deque<RecordBatch> dq = dequeFor(tp);
         synchronized (dq) {
-            RecordBatch batch = dq.peekLast();
-            if (batch != null) {
-                FutureRecordMetadata future = batch.tryAppend(key, value, compression, callback);
-                if (future != null)
-                    return future;
+            RecordBatch last = dq.peekLast();
+            if (last != null) {
+                FutureRecordMetadata future = last.tryAppend(key, value, callback);
+                if (future != null) {
+                    return new RecordAppendResult(future, dq.size() > 1 || last.records.isFull(), false);
+                }
             }
         }
 
         // we don't have an in-progress record batch try to allocate a new batch
         int size = Math.max(this.batchSize, Records.LOG_OVERHEAD + Record.recordSize(key, value));
+        log.trace("Allocating a new {} byte message buffer for topic {} partition {}", size, tp.topic(), tp.partition());
         ByteBuffer buffer = free.allocate(size);
         synchronized (dq) {
-            RecordBatch first = dq.peekLast();
-            if (first != null) {
-                FutureRecordMetadata future = first.tryAppend(key, value, compression, callback);
+            RecordBatch last = dq.peekLast();
+            if (last != null) {
+                FutureRecordMetadata future = last.tryAppend(key, value, callback);
                 if (future != null) {
                     // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen
                     // often...
                     free.deallocate(buffer);
-                    return future;
+                    return new RecordAppendResult(future, dq.size() > 1 || last.records.isFull(), false);
                 }
             }
-            RecordBatch batch = new RecordBatch(tp, new MemoryRecords(buffer), time.milliseconds());
-            FutureRecordMetadata future = Utils.notNull(batch.tryAppend(key, value, compression, callback));
+            MemoryRecords records = MemoryRecords.emptyRecords(buffer, compression, this.batchSize);
+            RecordBatch batch = new RecordBatch(tp, records, time.milliseconds());
+            FutureRecordMetadata future = Utils.notNull(batch.tryAppend(key, value, callback));
+
             dq.addLast(batch);
-            return future;
+            return new RecordAppendResult(future, dq.size() > 1 || batch.records.isFull(), true);
         }
     }
 
     /**
-     * Get a list of topic-partitions which are ready to be sent.
+     * Re-enqueue the given record batch in the accumulator to retry
+     */
+    public void reenqueue(RecordBatch batch, long now) {
+        batch.attempts++;
+        batch.lastAttemptMs = now;
+        Deque<RecordBatch> deque = dequeFor(batch.topicPartition);
+        synchronized (deque) {
+            deque.addFirst(batch);
+        }
+    }
+
+    /**
+     * Get a list of nodes whose partitions are ready to be sent, and the earliest time at which any non-sendable
+     * partition will be ready; Also return the flag for whether there are any unknown leaders for the accumulated
+     * partition batches.
      * <p>
-     * A partition is ready if ANY of the following are true:
+     * A destination node is ready to send data if ANY one of its partition is not backing off the send and ANY of the
+     * following are true :
      * <ol>
      * <li>The record set is full
      * <li>The record set has sat in the accumulator for at least lingerMs milliseconds
@@ -163,57 +197,110 @@ public FutureRecordMetadata append(TopicPartition tp, byte[] key, byte[] value,
      * <li>The accumulator has been closed
      * </ol>
      */
-    public List<TopicPartition> ready(long now) {
-        List<TopicPartition> ready = new ArrayList<TopicPartition>();
+    public ReadyCheckResult ready(Cluster cluster, long nowMs) {
+        Set<Node> readyNodes = new HashSet<Node>();
+        long nextReadyCheckDelayMs = Long.MAX_VALUE;
+        boolean unknownLeadersExist = false;
+
         boolean exhausted = this.free.queued() > 0;
         for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) {
+            TopicPartition part = entry.getKey();
             Deque<RecordBatch> deque = entry.getValue();
-            synchronized (deque) {
-                RecordBatch batch = deque.peekFirst();
-                if (batch != null) {
-                    boolean full = deque.size() > 1 || !batch.records.buffer().hasRemaining();
-                    boolean expired = now - batch.created >= lingerMs;
-                    if (full | expired | exhausted | closed)
-                        ready.add(batch.topicPartition);
+
+            Node leader = cluster.leaderFor(part);
+            if (leader == null) {
+                unknownLeadersExist = true;
+            } else if (!readyNodes.contains(leader)) {
+                synchronized (deque) {
+                    RecordBatch batch = deque.peekFirst();
+                    if (batch != null) {
+                        boolean backingOff = batch.attempts > 0 && batch.lastAttemptMs + retryBackoffMs > nowMs;
+                        long waitedTimeMs = nowMs - batch.lastAttemptMs;
+                        long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
+                        long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
+                        boolean full = deque.size() > 1 || batch.records.isFull();
+                        boolean expired = waitedTimeMs >= timeToWaitMs;
+                        boolean sendable = full || expired || exhausted || closed;
+                        if (sendable && !backingOff) {
+                            readyNodes.add(leader);
+                        }
+                        else {
+                            // Note that this results in a conservative estimate since an un-sendable partition may have
+                            // a leader that will later be found to have sendable data. However, this is good enough
+                            // since we'll just wake up and then sleep again for the remaining time.
+                            nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
+                        }
+                    }
                 }
             }
         }
-        return ready;
+
+        return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeadersExist);
+    }
+
+    /**
+     * @return Whether there is any unsent record in the accumulator.
+     */
+    public boolean hasUnsent() {
+        for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) {
+            Deque<RecordBatch> deque = entry.getValue();
+            synchronized (deque) {
+                if (deque.size() > 0)
+                    return true;
+            }
+        }
+        return false;
     }
 
     /**
-     * Drain all the data for the given topic-partitions that will fit within the specified size. This method attempts
-     * to avoid choosing the same topic-partitions over and over.
+     * Drain all the data for the given nodes and collate them into a list of batches that will fit within the specified
+     * size on a per-node basis. This method attempts to avoid choosing the same topic-node over and over.
      * 
-     * @param partitions The list of partitions to drain
+     * @param cluster The current cluster metadata
+     * @param nodes The list of node to drain
      * @param maxSize The maximum number of bytes to drain
-     * @return A list of {@link RecordBatch} for partitions specified with total size less than the requested maxSize.
+     * @param now The current unix time in milliseconds
+     * @return A list of {@link RecordBatch} for each node specified with total size less than the requested maxSize.
      *         TODO: There may be a starvation issue due to iteration order
      */
-    public List<RecordBatch> drain(List<TopicPartition> partitions, int maxSize) {
-        if (partitions.isEmpty())
-            return Collections.emptyList();
-        int size = 0;
-        List<RecordBatch> ready = new ArrayList<RecordBatch>();
-        /* to make starvation less likely this loop doesn't start at 0 */
-        int start = drainIndex = drainIndex % partitions.size();
-        do {
-            TopicPartition tp = partitions.get(drainIndex);
-            Deque<RecordBatch> deque = dequeFor(tp);
-            if (deque != null) {
-                synchronized (deque) {
-                    if (size + deque.peekFirst().records.sizeInBytes() > maxSize) {
-                        return ready;
-                    } else {
-                        RecordBatch batch = deque.pollFirst();
-                        size += batch.records.sizeInBytes();
-                        ready.add(batch);
+    public Map<Integer, List<RecordBatch>> drain(Cluster cluster, Set<Node> nodes, int maxSize, long now) {
+        if (nodes.isEmpty())
+            return Collections.emptyMap();
+
+        Map<Integer, List<RecordBatch>> batches = new HashMap<Integer, List<RecordBatch>>();
+        for (Node node : nodes) {
+            int size = 0;
+            List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
+            List<RecordBatch> ready = new ArrayList<RecordBatch>();
+            /* to make starvation less likely this loop doesn't start at 0 */
+            int start = drainIndex = drainIndex % parts.size();
+            do {
+                PartitionInfo part = parts.get(drainIndex);
+                Deque<RecordBatch> deque = dequeFor(new TopicPartition(part.topic(), part.partition()));
+                if (deque != null) {
+                    synchronized (deque) {
+                        RecordBatch first = deque.peekFirst();
+                        if (first != null) {
+                            if (size + first.records.sizeInBytes() > maxSize && !ready.isEmpty()) {
+                                // there is a rare case that a single batch size is larger than the request size due
+                                // to compression; in this case we will still eventually send this batch in a single
+                                // request
+                                break;
+                            } else {
+                                RecordBatch batch = deque.pollFirst();
+                                batch.records.close();
+                                size += batch.records.sizeInBytes();
+                                ready.add(batch);
+                                batch.drainedMs = now;
+                            }
+                        }
                     }
                 }
-            }
-            this.drainIndex = (this.drainIndex + 1) % partitions.size();
-        } while (start != drainIndex);
-        return ready;
+                this.drainIndex = (this.drainIndex + 1) % parts.size();
+            } while (start != drainIndex);
+            batches.put(node.id(), ready);
+        }
+        return batches;
     }
 
     /**
@@ -229,16 +316,10 @@ private Deque<RecordBatch> dequeFor(TopicPartition tp) {
     }
 
     /**
-     * Deallocate the list of record batches
+     * Deallocate the record batch
      */
-    public void deallocate(Collection<RecordBatch> batches) {
-        ByteBuffer[] buffers = new ByteBuffer[batches.size()];
-        int i = 0;
-        for (RecordBatch batch : batches) {
-            buffers[i] = batch.records.buffer();
-            i++;
-        }
-        free.deallocate(buffers);
+    public void deallocate(RecordBatch batch) {
+        free.deallocate(batch.records.buffer(), batch.records.capacity());
     }
 
     /**
@@ -248,4 +329,28 @@ public void close() {
         this.closed = true;
     }
 
+
+    public final static class RecordAppendResult {
+        public final FutureRecordMetadata future;
+        public final boolean batchIsFull;
+        public final boolean newBatchCreated;
+
+        public RecordAppendResult(FutureRecordMetadata future, boolean batchIsFull, boolean newBatchCreated) {
+            this.future = future;
+            this.batchIsFull = batchIsFull;
+            this.newBatchCreated = newBatchCreated;
+        }
+    }
+
+    public final static class ReadyCheckResult {
+        public final Set<Node> readyNodes;
+        public final long nextReadyCheckDelayMs;
+        public final boolean unknownLeadersExist;
+
+        public ReadyCheckResult(Set<Node> readyNodes, long nextReadyCheckDelayMs, boolean unknownLeadersExist) {
+            this.readyNodes = readyNodes;
+            this.nextReadyCheckDelayMs = nextReadyCheckDelayMs;
+            this.unknownLeadersExist = unknownLeadersExist;
+        }
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java
index 7a440a3dd29c7..dd0af8aee98ab 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer.internals;
 
@@ -20,11 +16,11 @@
 import java.util.List;
 
 import org.apache.kafka.clients.producer.Callback;
-import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.common.record.CompressionType;
 import org.apache.kafka.common.record.MemoryRecords;
-
+import org.apache.kafka.common.record.Record;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A batch of records that is or will be sent.
@@ -32,15 +28,23 @@
  * This class is not thread safe and external synchronization must be used when modifying it
  */
 public final class RecordBatch {
+
+    private static final Logger log = LoggerFactory.getLogger(RecordBatch.class);
+
     public int recordCount = 0;
-    public final long created;
+    public int maxRecordSize = 0;
+    public volatile int attempts = 0;
+    public final long createdMs;
+    public long drainedMs;
+    public long lastAttemptMs;
     public final MemoryRecords records;
     public final TopicPartition topicPartition;
     private final ProduceRequestResult produceFuture;
     private final List<Thunk> thunks;
 
     public RecordBatch(TopicPartition tp, MemoryRecords records, long now) {
-        this.created = now;
+        this.createdMs = now;
+        this.lastAttemptMs = now;
         this.records = records;
         this.topicPartition = tp;
         this.produceFuture = new ProduceRequestResult();
@@ -52,14 +56,15 @@ public RecordBatch(TopicPartition tp, MemoryRecords records, long now) {
      * 
      * @return The RecordSend corresponding to this record or null if there isn't sufficient room.
      */
-    public FutureRecordMetadata tryAppend(byte[] key, byte[] value, CompressionType compression, Callback callback) {
+    public FutureRecordMetadata tryAppend(byte[] key, byte[] value, Callback callback) {
         if (!this.records.hasRoomFor(key, value)) {
             return null;
         } else {
-            this.records.append(0L, key, value, compression);
+            this.records.append(0L, key, value);
+            this.maxRecordSize = Math.max(this.maxRecordSize, Record.recordSize(key, value));
             FutureRecordMetadata future = new FutureRecordMetadata(this.produceFuture, this.recordCount);
             if (callback != null)
-                thunks.add(new Thunk(callback, this.recordCount));
+                thunks.add(new Thunk(callback, future));
             this.recordCount++;
             return future;
         }
@@ -68,36 +73,44 @@ public FutureRecordMetadata tryAppend(byte[] key, byte[] value, CompressionType
     /**
      * Complete the request
      * 
-     * @param offset The offset
-     * @param errorCode The error code or 0 if no error
+     * @param baseOffset The base offset of the messages assigned by the server
+     * @param exception The exception that occurred (or null if the request was successful)
      */
-    public void done(long offset, RuntimeException exception) {
-        this.produceFuture.done(topicPartition, offset, exception);
+    public void done(long baseOffset, RuntimeException exception) {
+        this.produceFuture.done(topicPartition, baseOffset, exception);
+        log.trace("Produced messages to topic-partition {} with base offset offset {} and error: {}.",
+                  topicPartition,
+                  baseOffset,
+                  exception);
         // execute callbacks
         for (int i = 0; i < this.thunks.size(); i++) {
             try {
                 Thunk thunk = this.thunks.get(i);
                 if (exception == null)
-                    thunk.callback.onCompletion(new RecordMetadata(topicPartition, this.produceFuture.baseOffset() + thunk.relativeOffset),
-                                                null);
+                    thunk.callback.onCompletion(thunk.future.get(), null);
                 else
                     thunk.callback.onCompletion(null, exception);
             } catch (Exception e) {
-                e.printStackTrace();
+                log.error("Error executing user-provided callback on message for topic-partition {}:", topicPartition, e);
             }
         }
     }
 
     /**
-     * A callback and the associated RecordSend argument to pass to it.
+     * A callback and the associated FutureRecordMetadata argument to pass to it.
      */
     final private static class Thunk {
         final Callback callback;
-        final long relativeOffset;
+        final FutureRecordMetadata future;
 
-        public Thunk(Callback callback, long relativeOffset) {
+        public Thunk(Callback callback, FutureRecordMetadata future) {
             this.callback = callback;
-            this.relativeOffset = relativeOffset;
+            this.future = future;
         }
     }
+
+    @Override
+    public String toString() {
+        return "RecordBatch(topicPartition=" + topicPartition + ", recordCount=" + recordCount + ")";
+    }
 }
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
index b274e5e4376b6..84a7a07269c51 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
@@ -1,49 +1,48 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer.internals;
 
-import java.io.IOException;
-import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
-import java.util.ArrayDeque;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
+import org.apache.kafka.clients.ClientRequest;
+import org.apache.kafka.clients.ClientResponse;
+import org.apache.kafka.clients.KafkaClient;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.common.errors.NetworkException;
-import org.apache.kafka.common.network.NetworkReceive;
-import org.apache.kafka.common.network.NetworkSend;
-import org.apache.kafka.common.network.Selectable;
+import org.apache.kafka.common.errors.InvalidMetadataException;
+import org.apache.kafka.common.errors.RetriableException;
+import org.apache.kafka.common.metrics.Measurable;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Rate;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.Errors;
-import org.apache.kafka.common.protocol.ProtoUtils;
-import org.apache.kafka.common.protocol.types.Struct;
-import org.apache.kafka.common.requests.RequestHeader;
+import org.apache.kafka.common.requests.ProduceRequest;
+import org.apache.kafka.common.requests.ProduceResponse;
 import org.apache.kafka.common.requests.RequestSend;
-import org.apache.kafka.common.requests.ResponseHeader;
 import org.apache.kafka.common.utils.Time;
-
+import org.apache.kafka.common.utils.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * The background thread that handles the sending of produce requests to the Kafka cluster. This thread makes metadata
@@ -51,134 +50,143 @@
  */
 public class Sender implements Runnable {
 
-    private final Map<Integer, NodeState> nodeState;
+    private static final Logger log = LoggerFactory.getLogger(Sender.class);
+
+    /* the state of each nodes connection */
+    private final KafkaClient client;
+
+    /* the record accumulator that batches records */
     private final RecordAccumulator accumulator;
-    private final Selectable selector;
-    private final String clientId;
+
+    /* the metadata for the client */
+    private final Metadata metadata;
+
+    /* the maximum request size to attempt to send to the server */
     private final int maxRequestSize;
-    private final long reconnectBackoffMs;
+
+    /* the number of acknowledgements to request from the server */
     private final short acks;
+
+    /* the max time in ms for the server to wait for acknowlegements */
     private final int requestTimeout;
-    private final InFlightRequests inFlightRequests;
-    private final Metadata metadata;
+
+    /* the number of times to retry a failed request before giving up */
+    private final int retries;
+
+    /* the clock instance used for getting the time */
     private final Time time;
-    private int correlation;
-    private boolean metadataFetchInProgress;
+
+    /* true while the sender thread is still running */
     private volatile boolean running;
 
-    public Sender(Selectable selector,
+    /* metrics */
+    private final SenderMetrics sensors;
+
+    public Sender(KafkaClient client,
                   Metadata metadata,
                   RecordAccumulator accumulator,
-                  String clientId,
                   int maxRequestSize,
-                  long reconnectBackoffMs,
                   short acks,
+                  int retries,
                   int requestTimeout,
+                  Metrics metrics,
                   Time time) {
-        this.nodeState = new HashMap<Integer, NodeState>();
+        this.client = client;
         this.accumulator = accumulator;
-        this.selector = selector;
-        this.maxRequestSize = maxRequestSize;
-        this.reconnectBackoffMs = reconnectBackoffMs;
         this.metadata = metadata;
-        this.clientId = clientId;
+        this.maxRequestSize = maxRequestSize;
         this.running = true;
         this.requestTimeout = requestTimeout;
         this.acks = acks;
-        this.inFlightRequests = new InFlightRequests();
-        this.correlation = 0;
-        this.metadataFetchInProgress = false;
+        this.retries = retries;
         this.time = time;
+        this.sensors = new SenderMetrics(metrics);
     }
 
     /**
      * The main run loop for the sender thread
      */
     public void run() {
+        log.debug("Starting Kafka producer I/O thread.");
+
         // main loop, runs until close is called
         while (running) {
             try {
                 run(time.milliseconds());
             } catch (Exception e) {
-                e.printStackTrace();
+                log.error("Uncaught error in kafka producer I/O thread: ", e);
             }
         }
 
-        // send anything left in the accumulator
-        int unsent = 0;
-        do {
+        log.debug("Beginning shutdown of Kafka producer I/O thread, sending remaining records.");
+
+        // okay we stopped accepting requests but there may still be
+        // requests in the accumulator or waiting for acknowledgment,
+        // wait until these are completed.
+        while (this.accumulator.hasUnsent() || this.client.inFlightRequestCount() > 0) {
             try {
-                unsent = run(time.milliseconds());
+                run(time.milliseconds());
             } catch (Exception e) {
-                e.printStackTrace();
+                log.error("Uncaught error in kafka producer I/O thread: ", e);
             }
-        } while (unsent > 0);
+        }
+
+        this.client.close();
 
-        // close all the connections
-        this.selector.close();
+        log.debug("Shutdown of Kafka producer I/O thread has completed.");
     }
 
     /**
      * Run a single iteration of sending
      * 
-     * @param now The current time
-     * @return The total number of topic/partitions that had data ready (regardless of what we actually sent)
+     * @param now The current POSIX time in milliseconds
      */
-    public int run(long now) {
+    public void run(long now) {
         Cluster cluster = metadata.fetch();
         // get the list of partitions with data ready to send
-        List<TopicPartition> ready = this.accumulator.ready(now);
-
-        // prune the list of ready topics to eliminate any that we aren't ready to send yet
-        List<TopicPartition> sendable = processReadyPartitions(cluster, ready, now);
-
-        // should we update our metadata?
-        List<NetworkSend> sends = new ArrayList<NetworkSend>(sendable.size());
-        InFlightRequest metadataReq = maybeMetadataRequest(cluster, now);
-        if (metadataReq != null) {
-            sends.add(metadataReq.request);
-            this.inFlightRequests.add(metadataReq);
+        RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);
+
+        // if there are any partitions whose leaders are not known yet, force metadata update
+        if (result.unknownLeadersExist)
+            this.metadata.requestUpdate();
+
+        // remove any nodes we aren't ready to send to
+        Iterator<Node> iter = result.readyNodes.iterator();
+        long notReadyTimeout = Long.MAX_VALUE;
+        while (iter.hasNext()) {
+            Node node = iter.next();
+            if (!this.client.ready(node, now)) {
+                iter.remove();
+                notReadyTimeout = Math.min(notReadyTimeout, this.client.connectionDelay(node, now));
+            }
         }
 
         // create produce requests
-        List<RecordBatch> batches = this.accumulator.drain(sendable, this.maxRequestSize);
-        List<InFlightRequest> requests = collate(cluster, batches);
-        for (int i = 0; i < requests.size(); i++) {
-            InFlightRequest request = requests.get(i);
-            this.inFlightRequests.add(request);
-            sends.add(request.request);
-        }
-
-        // do the I/O
-        try {
-            this.selector.poll(5L, sends);
-        } catch (IOException e) {
-            e.printStackTrace();
+        Map<Integer, List<RecordBatch>> batches = this.accumulator.drain(cluster, result.readyNodes, this.maxRequestSize, now);
+        List<ClientRequest> requests = createProduceRequests(batches, now);
+        sensors.updateProduceRequestMetrics(requests);
+
+        // If we have any nodes that are ready to send + have sendable data, poll with 0 timeout so this can immediately
+        // loop and try sending more data. Otherwise, the timeout is determined by nodes that have partitions with data
+        // that isn't yet sendable (e.g. lingering, backing off). Note that this specifically does not include nodes
+        // with sendable data that aren't ready to send since they would cause busy looping.
+        long pollTimeout = Math.min(result.nextReadyCheckDelayMs, notReadyTimeout);
+        if (result.readyNodes.size() > 0) {
+            log.trace("Nodes with data ready to send: {}", result.readyNodes);
+            log.trace("Created {} produce requests: {}", requests.size(), requests);
+            pollTimeout = 0;
         }
 
-        // handle responses, connections, and disconnections
-        handleSends(this.selector.completedSends());
-        handleResponses(this.selector.completedReceives(), now);
-        handleDisconnects(this.selector.disconnected());
-        handleConnects(this.selector.connected());
-
-        return ready.size();
-    }
-
-    private InFlightRequest maybeMetadataRequest(Cluster cluster, long now) {
-        if (this.metadataFetchInProgress || !metadata.needsUpdate(now))
-            return null;
-        Node node = cluster.nextNode();
-        NodeState state = nodeState.get(node.id());
-        if (state == null || (state.state == ConnectionState.DISCONNECTED && now - state.lastConnectAttempt > this.reconnectBackoffMs)) {
-            // we don't have a connection to this node right now, make one
-            initiateConnect(node, now);
-            return null;
-        } else if (state.state == ConnectionState.CONNECTED) {
-            this.metadataFetchInProgress = true;
-            return metadataRequest(node.id(), metadata.topics());
-        } else {
-            return null;
+        // if some partitions are already ready to be sent, the select time would be 0;
+        // otherwise if some partition already has some data accumulated but not ready yet,
+        // the select time will be the time difference between now and its linger expiry time;
+        // otherwise the select time will be the time difference between now and the metadata expiry time;
+        List<ClientResponse> responses = this.client.poll(requests, pollTimeout, now);
+        for (ClientResponse response : responses) {
+            if (response.wasDisconnected())
+                handleDisconnect(response, now);
+            else
+                handleResponse(response, now);
         }
     }
 
@@ -188,332 +196,274 @@ private InFlightRequest maybeMetadataRequest(Cluster cluster, long now) {
     public void initiateClose() {
         this.running = false;
         this.accumulator.close();
+        this.wakeup();
     }
 
-    /**
-     * Process the set of topic-partitions with data ready to send. If we have a connection to the appropriate node, add
-     * it to the returned set. For any partitions we have no connection to either make one, fetch the appropriate
-     * metdata to be able to do so
-     */
-    private List<TopicPartition> processReadyPartitions(Cluster cluster, List<TopicPartition> ready, long now) {
-        List<TopicPartition> sendable = new ArrayList<TopicPartition>(ready.size());
-        for (TopicPartition tp : ready) {
-            Node node = cluster.leaderFor(tp);
-            if (node == null) {
-                // we don't know about this topic/partition or it has no leader, re-fetch metadata
-                metadata.forceUpdate();
-            } else {
-                NodeState state = nodeState.get(node.id());
-                // TODO: encapsulate this logic somehow
-                if (state == null || (state.state == ConnectionState.DISCONNECTED && now - state.lastConnectAttempt > this.reconnectBackoffMs)) {
-                    // we don't have a connection to this node right now, make one
-                    initiateConnect(node, now);
-                } else if (state.state == ConnectionState.CONNECTED && inFlightRequests.canSendMore(node.id())) {
-                    sendable.add(tp);
-                }
-            }
-        }
-        return sendable;
-    }
-
-    /**
-     * Initiate a connection to the given node
-     */
-    private void initiateConnect(Node node, long now) {
-        try {
-            selector.connect(node.id(), new InetSocketAddress(node.host(), node.port()), 64 * 1024 * 1024, 64 * 1024 * 1024); // TODO
-                                                                                                                              // socket
-                                                                                                                              // buffers
-            nodeState.put(node.id(), new NodeState(ConnectionState.CONNECTING, now));
-        } catch (IOException e) {
-            /* attempt failed, we'll try again after the backoff */
-            nodeState.put(node.id(), new NodeState(ConnectionState.DISCONNECTED, now));
-            /* maybe the problem is our metadata, update it */
-            metadata.forceUpdate();
-        }
-    }
-
-    /**
-     * Handle any closed connections
-     */
-    private void handleDisconnects(List<Integer> disconnects) {
-        for (int node : disconnects) {
-            for (InFlightRequest request : this.inFlightRequests.clearAll(node)) {
-                if (request.batches != null) {
-                    for (RecordBatch batch : request.batches.values())
-                        batch.done(-1L, new NetworkException("The server disconnected unexpectedly without sending a response."));
-                    this.accumulator.deallocate(request.batches.values());
-                }
-                NodeState state = this.nodeState.get(request.request.destination());
-                if (state != null)
-                    state.state = ConnectionState.DISCONNECTED;
-            }
-        }
-    }
-
-    /**
-     * Record any connections that completed in our node state
-     */
-    private void handleConnects(List<Integer> connects) {
-        for (Integer id : connects)
-            this.nodeState.get(id).state = ConnectionState.CONNECTED;
-    }
-
-    /**
-     * Process completed sends
-     */
-    public void handleSends(List<NetworkSend> sends) {
-        /* if acks = 0 then the request is satisfied once sent */
-        for (NetworkSend send : sends) {
-            Deque<InFlightRequest> requests = this.inFlightRequests.requestQueue(send.destination());
-            InFlightRequest request = requests.peekFirst();
-            if (!request.expectResponse) {
-                requests.pollFirst();
-                if (request.request.header().apiKey() == ApiKeys.PRODUCE.id) {
-                    for (RecordBatch batch : request.batches.values())
-                        batch.done(-1L, Errors.NONE.exception());
-                    this.accumulator.deallocate(request.batches.values());
-                }
-            }
-        }
-    }
-
-    /**
-     * Handle responses from the server
-     */
-    private void handleResponses(List<NetworkReceive> receives, long now) {
-        for (NetworkReceive receive : receives) {
-            int source = receive.source();
-            InFlightRequest req = inFlightRequests.nextCompleted(source);
-            ResponseHeader header = ResponseHeader.parse(receive.payload());
-            short apiKey = req.request.header().apiKey();
-            Struct body = (Struct) ProtoUtils.currentResponseSchema(apiKey).read(receive.payload());
-            correlate(req.request.header(), header);
-            if (req.request.header().apiKey() == ApiKeys.PRODUCE.id)
-                handleProduceResponse(req, body);
-            else if (req.request.header().apiKey() == ApiKeys.METADATA.id)
-                handleMetadataResponse(body, now);
-            else
-                throw new IllegalStateException("Unexpected response type: " + req.request.header().apiKey());
-        }
-    }
-
-    private void handleMetadataResponse(Struct body, long now) {
-        this.metadataFetchInProgress = false;
-        Cluster cluster = ProtoUtils.parseMetadataResponse(body);
-        this.metadata.update(cluster, now);
+    private void handleDisconnect(ClientResponse response, long now) {
+        log.trace("Cancelled request {} due to node {} being disconnected", response, response.request().request().destination());
+        int correlation = response.request().request().header().correlationId();
+        @SuppressWarnings("unchecked")
+        Map<TopicPartition, RecordBatch> responseBatches = (Map<TopicPartition, RecordBatch>) response.request().attachment();
+        for (RecordBatch batch : responseBatches.values())
+            completeBatch(batch, Errors.NETWORK_EXCEPTION, -1L, correlation, now);
     }
 
     /**
      * Handle a produce response
      */
-    private void handleProduceResponse(InFlightRequest request, Struct response) {
-        for (Object topicResponse : (Object[]) response.get("responses")) {
-            Struct topicRespStruct = (Struct) topicResponse;
-            String topic = (String) topicRespStruct.get("topic");
-            for (Object partResponse : (Object[]) topicRespStruct.get("partition_responses")) {
-                Struct partRespStruct = (Struct) partResponse;
-                int partition = (Integer) partRespStruct.get("partition");
-                short errorCode = (Short) partRespStruct.get("error_code");
-                long offset = (Long) partRespStruct.get("base_offset");
-                RecordBatch batch = request.batches.get(new TopicPartition(topic, partition));
-                batch.done(offset, Errors.forCode(errorCode).exception());
+    private void handleResponse(ClientResponse response, long now) {
+        int correlationId = response.request().request().header().correlationId();
+        log.trace("Received produce response from node {} with correlation id {}",
+                  response.request().request().destination(),
+                  correlationId);
+        @SuppressWarnings("unchecked")
+        Map<TopicPartition, RecordBatch> batches = (Map<TopicPartition, RecordBatch>) response.request().attachment();
+        // if we have a response, parse it
+        if (response.hasResponse()) {
+            ProduceResponse produceResponse = new ProduceResponse(response.responseBody());
+            for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
+                TopicPartition tp = entry.getKey();
+                ProduceResponse.PartitionResponse partResp = entry.getValue();
+                Errors error = Errors.forCode(partResp.errorCode);
+                RecordBatch batch = batches.get(tp);
+                completeBatch(batch, error, partResp.baseOffset, correlationId, now);
             }
+            this.sensors.recordLatency(response.request().request().destination(), response.requestLatencyMs());
+        } else {
+            // this is the acks = 0 case, just complete all requests
+            for (RecordBatch batch : batches.values())
+                completeBatch(batch, Errors.NONE, -1L, correlationId, now);
         }
-        this.accumulator.deallocate(request.batches.values());
     }
 
     /**
-     * Validate that the response corresponds to the request we expect or else explode
+     * Complete or retry the given batch of records.
+     * @param batch The record batch
+     * @param error The error (or null if none)
+     * @param baseOffset The base offset assigned to the records if successful
+     * @param correlationId The correlation id for the request
+     * @param now The current POSIX time stamp in milliseconds
      */
-    private void correlate(RequestHeader requestHeader, ResponseHeader responseHeader) {
-        if (requestHeader.correlationId() != responseHeader.correlationId())
-            throw new IllegalStateException("Correlation id for response (" + responseHeader.correlationId()
-                                            + ") does not match request ("
-                                            + requestHeader.correlationId()
-                                            + ")");
+    private void completeBatch(RecordBatch batch, Errors error, long baseOffset, long correlationId, long now) {
+        if (error != Errors.NONE && canRetry(batch, error)) {
+            // retry
+            log.warn("Got error produce response with correlation id {} on topic-partition {}, retrying ({} attempts left). Error: {}",
+                     correlationId,
+                     batch.topicPartition,
+                     this.retries - batch.attempts - 1,
+                     error);
+            this.accumulator.reenqueue(batch, now);
+            this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
+        } else {
+            // tell the user the result of their request
+            batch.done(baseOffset, error.exception());
+            this.accumulator.deallocate(batch);
+            if (error != Errors.NONE)
+                this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
+        }
+        if (error.exception() instanceof InvalidMetadataException)
+            metadata.requestUpdate();
     }
 
     /**
-     * Create a metadata request for the given topics
+     * We can retry a send if the error is transient and the number of attempts taken is fewer than the maximum allowed
      */
-    private InFlightRequest metadataRequest(int node, Set<String> topics) {
-        String[] ts = new String[topics.size()];
-        topics.toArray(ts);
-        Struct body = new Struct(ProtoUtils.currentRequestSchema(ApiKeys.METADATA.id));
-        body.set("topics", topics.toArray());
-        RequestSend send = new RequestSend(node, new RequestHeader(ApiKeys.METADATA.id, clientId, correlation++), body);
-        return new InFlightRequest(true, send, null);
+    private boolean canRetry(RecordBatch batch, Errors error) {
+        return batch.attempts < this.retries && error.exception() instanceof RetriableException;
     }
 
     /**
-     * Collate the record batches into a list of produce requests on a per-node basis
+     * Transfer the record batches into a list of produce requests on a per-node basis
      */
-    private List<InFlightRequest> collate(Cluster cluster, List<RecordBatch> batches) {
-        Map<Integer, List<RecordBatch>> collated = new HashMap<Integer, List<RecordBatch>>();
-        for (RecordBatch batch : batches) {
-            Node node = cluster.leaderFor(batch.topicPartition);
-            List<RecordBatch> found = collated.get(node.id());
-            if (found == null) {
-                found = new ArrayList<RecordBatch>();
-                collated.put(node.id(), found);
-            }
-            found.add(batch);
-        }
-        List<InFlightRequest> requests = new ArrayList<InFlightRequest>(collated.size());
+    private List<ClientRequest> createProduceRequests(Map<Integer, List<RecordBatch>> collated, long now) {
+        List<ClientRequest> requests = new ArrayList<ClientRequest>(collated.size());
         for (Map.Entry<Integer, List<RecordBatch>> entry : collated.entrySet())
-            requests.add(produceRequest(entry.getKey(), acks, requestTimeout, entry.getValue()));
+            requests.add(produceRequest(now, entry.getKey(), acks, requestTimeout, entry.getValue()));
         return requests;
     }
 
     /**
      * Create a produce request from the given record batches
      */
-    private InFlightRequest produceRequest(int destination, short acks, int timeout, List<RecordBatch> batches) {
-        Map<TopicPartition, RecordBatch> batchesByPartition = new HashMap<TopicPartition, RecordBatch>();
-        Map<String, List<RecordBatch>> batchesByTopic = new HashMap<String, List<RecordBatch>>();
+    private ClientRequest produceRequest(long now, int destination, short acks, int timeout, List<RecordBatch> batches) {
+        Map<TopicPartition, ByteBuffer> produceRecordsByPartition = new HashMap<TopicPartition, ByteBuffer>(batches.size());
+        Map<TopicPartition, RecordBatch> recordsByPartition = new HashMap<TopicPartition, RecordBatch>(batches.size());
         for (RecordBatch batch : batches) {
-            batchesByPartition.put(batch.topicPartition, batch);
-            List<RecordBatch> found = batchesByTopic.get(batch.topicPartition.topic());
-            if (found == null) {
-                found = new ArrayList<RecordBatch>();
-                batchesByTopic.put(batch.topicPartition.topic(), found);
-            }
-            found.add(batch);
-        }
-        Struct produce = new Struct(ProtoUtils.currentRequestSchema(ApiKeys.PRODUCE.id));
-        produce.set("acks", acks);
-        produce.set("timeout", timeout);
-        List<Struct> topicDatas = new ArrayList<Struct>(batchesByTopic.size());
-        for (Map.Entry<String, List<RecordBatch>> entry : batchesByTopic.entrySet()) {
-            Struct topicData = produce.instance("topic_data");
-            topicData.set("topic", entry.getKey());
-            List<RecordBatch> parts = entry.getValue();
-            Object[] partitionData = new Object[parts.size()];
-            for (int i = 0; i < parts.size(); i++) {
-                ByteBuffer buffer = parts.get(i).records.buffer();
-                buffer.flip();
-                Struct part = topicData.instance("data")
-                                       .set("partition", parts.get(i).topicPartition.partition())
-                                       .set("record_set", buffer);
-                partitionData[i] = part;
-            }
-            topicData.set("data", partitionData);
-            topicDatas.add(topicData);
+            TopicPartition tp = batch.topicPartition;
+            ByteBuffer recordsBuffer = batch.records.buffer();
+            recordsBuffer.flip();
+            produceRecordsByPartition.put(tp, recordsBuffer);
+            recordsByPartition.put(tp, batch);
         }
-        produce.set("topic_data", topicDatas.toArray());
-
-        RequestHeader header = new RequestHeader(ApiKeys.PRODUCE.id, clientId, correlation++);
-        RequestSend send = new RequestSend(destination, header, produce);
-        return new InFlightRequest(acks != 0, send, batchesByPartition);
+        ProduceRequest request = new ProduceRequest(acks, timeout, produceRecordsByPartition);
+        RequestSend send = new RequestSend(destination, this.client.nextRequestHeader(ApiKeys.PRODUCE), request.toStruct());
+        return new ClientRequest(now, acks != 0, send, recordsByPartition);
     }
 
     /**
      * Wake up the selector associated with this send thread
      */
     public void wakeup() {
-        this.selector.wakeup();
-    }
-
-    /**
-     * The states of a node connection
-     */
-    private static enum ConnectionState {
-        DISCONNECTED, CONNECTING, CONNECTED
+        this.client.wakeup();
     }
 
     /**
-     * The state of a node
+     * A collection of sensors for the sender
      */
-    private static final class NodeState {
-        private ConnectionState state;
-        private long lastConnectAttempt;
-
-        public NodeState(ConnectionState state, long lastConnectAttempt) {
-            this.state = state;
-            this.lastConnectAttempt = lastConnectAttempt;
-        }
-
-        public String toString() {
-            return "NodeState(" + state + ", " + lastConnectAttempt + ")";
-        }
-    }
-
-    /**
-     * An request that hasn't been fully processed yet
-     */
-    private static final class InFlightRequest {
-        public boolean expectResponse;
-        public Map<TopicPartition, RecordBatch> batches;
-        public RequestSend request;
-
-        /**
-         * @param expectResponse Should we expect a response message or is this request complete once it is sent?
-         * @param request The request
-         * @param batches The record batches contained in the request if it is a produce request
-         */
-        public InFlightRequest(boolean expectResponse, RequestSend request, Map<TopicPartition, RecordBatch> batches) {
-            this.batches = batches;
-            this.request = request;
-            this.expectResponse = expectResponse;
+    private class SenderMetrics {
+
+        private final Metrics metrics;
+        public final Sensor retrySensor;
+        public final Sensor errorSensor;
+        public final Sensor queueTimeSensor;
+        public final Sensor requestTimeSensor;
+        public final Sensor recordsPerRequestSensor;
+        public final Sensor batchSizeSensor;
+        public final Sensor compressionRateSensor;
+        public final Sensor maxRecordSizeSensor;
+
+        public SenderMetrics(Metrics metrics) {
+            this.metrics = metrics;
+
+            this.batchSizeSensor = metrics.sensor("batch-size");
+            this.batchSizeSensor.add("batch-size-avg", "The average number of bytes sent per partition per-request.", new Avg());
+            this.batchSizeSensor.add("batch-size-max", "The max number of bytes sent per partition per-request.", new Max());
+
+            this.compressionRateSensor = metrics.sensor("compression-rate");
+            this.compressionRateSensor.add("compression-rate-avg", "The average compression rate of record batches.", new Avg());
+
+            this.queueTimeSensor = metrics.sensor("queue-time");
+            this.queueTimeSensor.add("record-queue-time-avg",
+                                     "The average time in ms record batches spent in the record accumulator.",
+                                     new Avg());
+            this.queueTimeSensor.add("record-queue-time-max",
+                                     "The maximum time in ms record batches spent in the record accumulator.",
+                                     new Max());
+
+            this.requestTimeSensor = metrics.sensor("request-time");
+            this.requestTimeSensor.add("request-latency-avg", "The average request latency in ms", new Avg());
+            this.requestTimeSensor.add("request-latency-max", "The maximum request latency in ms", new Max());
+
+            this.recordsPerRequestSensor = metrics.sensor("records-per-request");
+            this.recordsPerRequestSensor.add("record-send-rate", "The average number of records sent per second.", new Rate());
+            this.recordsPerRequestSensor.add("records-per-request-avg", "The average number of records per request.", new Avg());
+
+            this.retrySensor = metrics.sensor("record-retries");
+            this.retrySensor.add("record-retry-rate", "The average per-second number of retried record sends", new Rate());
+
+            this.errorSensor = metrics.sensor("errors");
+            this.errorSensor.add("record-error-rate", "The average per-second number of record sends that resulted in errors", new Rate());
+
+            this.maxRecordSizeSensor = metrics.sensor("record-size-max");
+            this.maxRecordSizeSensor.add("record-size-max", "The maximum record size", new Max());
+            this.maxRecordSizeSensor.add("record-size-avg", "The average record size", new Avg());
+
+            this.metrics.addMetric("requests-in-flight", "The current number of in-flight requests awaiting a response.", new Measurable() {
+                public double measure(MetricConfig config, long now) {
+                    return client.inFlightRequestCount();
+                }
+            });
+            metrics.addMetric("metadata-age", "The age in seconds of the current producer metadata being used.", new Measurable() {
+                public double measure(MetricConfig config, long now) {
+                    return (now - metadata.lastUpdate()) / 1000.0;
+                }
+            });
         }
-    }
 
-    /**
-     * A set of outstanding request queues for each node that have not yet received responses
-     */
-    private static final class InFlightRequests {
-        private final Map<Integer, Deque<InFlightRequest>> requests = new HashMap<Integer, Deque<InFlightRequest>>();
-
-        /**
-         * Add the given request to the queue for the node it was directed to
-         */
-        public void add(InFlightRequest request) {
-            Deque<InFlightRequest> reqs = this.requests.get(request.request.destination());
-            if (reqs == null) {
-                reqs = new ArrayDeque<InFlightRequest>();
-                this.requests.put(request.request.destination(), reqs);
+        public void maybeRegisterTopicMetrics(String topic) {
+            // if one sensor of the metrics has been registered for the topic,
+            // then all other sensors should have been registered; and vice versa
+            String topicRecordsCountName = "topic." + topic + ".records-per-batch";
+            Sensor topicRecordCount = this.metrics.getSensor(topicRecordsCountName);
+            if (topicRecordCount == null) {
+                topicRecordCount = this.metrics.sensor(topicRecordsCountName);
+                topicRecordCount.add("topic." + topic + ".record-send-rate", new Rate());
+
+                String topicByteRateName = "topic." + topic + ".bytes";
+                Sensor topicByteRate = this.metrics.sensor(topicByteRateName);
+                topicByteRate.add("topic." + topic + ".byte-rate", new Rate());
+
+                String topicCompressionRateName = "topic." + topic + ".compression-rate";
+                Sensor topicCompressionRate = this.metrics.sensor(topicCompressionRateName);
+                topicCompressionRate.add("topic." + topic + ".compression-rate", new Avg());
+
+                String topicRetryName = "topic." + topic + ".record-retries";
+                Sensor topicRetrySensor = this.metrics.sensor(topicRetryName);
+                topicRetrySensor.add("topic." + topic + ".record-retry-rate", new Rate());
+
+                String topicErrorName = "topic." + topic + ".record-errors";
+                Sensor topicErrorSensor = this.metrics.sensor(topicErrorName);
+                topicErrorSensor.add("topic." + topic + ".record-error-rate", new Rate());
             }
-            reqs.addFirst(request);
         }
 
-        public Deque<InFlightRequest> requestQueue(int node) {
-            Deque<InFlightRequest> reqs = requests.get(node);
-            if (reqs == null || reqs.isEmpty())
-                throw new IllegalStateException("Response from server for which there are no in-flight requests.");
-            return reqs;
+        public void updateProduceRequestMetrics(List<ClientRequest> requests) {
+            long now = time.milliseconds();
+            for (int i = 0; i < requests.size(); i++) {
+                ClientRequest request = requests.get(i);
+                int records = 0;
+
+                if (request.attachment() != null) {
+                    Map<TopicPartition, RecordBatch> responseBatches = (Map<TopicPartition, RecordBatch>) request.attachment();
+                    for (RecordBatch batch : responseBatches.values()) {
+
+                        // register all per-topic metrics at once
+                        String topic = batch.topicPartition.topic();
+                        maybeRegisterTopicMetrics(topic);
+
+                        // per-topic record send rate
+                        String topicRecordsCountName = "topic." + topic + ".records-per-batch";
+                        Sensor topicRecordCount = Utils.notNull(this.metrics.getSensor(topicRecordsCountName));
+                        topicRecordCount.record(batch.recordCount);
+
+                        // per-topic bytes send rate
+                        String topicByteRateName = "topic." + topic + ".bytes";
+                        Sensor topicByteRate = Utils.notNull(this.metrics.getSensor(topicByteRateName));
+                        topicByteRate.record(batch.records.sizeInBytes());
+
+                        // per-topic compression rate
+                        String topicCompressionRateName = "topic." + topic + ".compression-rate";
+                        Sensor topicCompressionRate = Utils.notNull(this.metrics.getSensor(topicCompressionRateName));
+                        topicCompressionRate.record(batch.records.compressionRate());
+
+                        // global metrics
+                        this.batchSizeSensor.record(batch.records.sizeInBytes(), now);
+                        this.queueTimeSensor.record(batch.drainedMs - batch.createdMs, now);
+                        this.compressionRateSensor.record(batch.records.compressionRate());
+                        this.maxRecordSizeSensor.record(batch.maxRecordSize, now);
+                        records += batch.recordCount;
+                    }
+                    this.recordsPerRequestSensor.record(records, now);
+                }
+            }
         }
 
-        /**
-         * Get the oldest request (the one that that will be completed next) for the given node
-         */
-        public InFlightRequest nextCompleted(int node) {
-            return requestQueue(node).pollLast();
+        public void recordRetries(String topic, int count) {
+            long now = time.milliseconds();
+            this.retrySensor.record(count, now);
+            String topicRetryName = "topic." + topic + ".record-retries";
+            Sensor topicRetrySensor = this.metrics.getSensor(topicRetryName);
+            if (topicRetrySensor != null)
+                topicRetrySensor.record(count, now);
         }
 
-        /**
-         * Can we send more requests to this node?
-         * 
-         * @param node Node in question
-         * @return true iff we have no requests still being sent to the given node
-         */
-        public boolean canSendMore(int node) {
-            Deque<InFlightRequest> queue = requests.get(node);
-            return queue == null || queue.isEmpty() || queue.peekFirst().request.complete();
+        public void recordErrors(String topic, int count) {
+            long now = time.milliseconds();
+            this.errorSensor.record(count, now);
+            String topicErrorName = "topic." + topic + ".record-errors";
+            Sensor topicErrorSensor = this.metrics.getSensor(topicErrorName);
+            if (topicErrorSensor != null)
+                topicErrorSensor.record(count, now);
         }
 
-        /**
-         * Clear out all the in-flight requests for the given node and return them
-         * 
-         * @param node The node
-         * @return All the in-flight requests for that node that have been removed
-         */
-        public Iterable<InFlightRequest> clearAll(int node) {
-            Deque<InFlightRequest> reqs = requests.get(node);
-            if (reqs == null) {
-                return Collections.emptyList();
-            } else {
-                return requests.remove(node);
+        public void recordLatency(int node, long latency) {
+            long now = time.milliseconds();
+            this.requestTimeSensor.record(latency, now);
+            if (node >= 0) {
+                String nodeTimeName = "node-" + node + ".latency";
+                Sensor nodeRequestTime = this.metrics.getSensor(nodeTimeName);
+                if (nodeRequestTime != null)
+                    nodeRequestTime.record(latency, now);
             }
         }
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/tools/ProducerPerformance.java b/clients/src/main/java/org/apache/kafka/clients/tools/ProducerPerformance.java
index 108d61e6dba6d..1b828007975ef 100644
--- a/clients/src/main/java/org/apache/kafka/clients/tools/ProducerPerformance.java
+++ b/clients/src/main/java/org/apache/kafka/clients/tools/ProducerPerformance.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.tools;
 
@@ -21,62 +17,204 @@
 
 import org.apache.kafka.clients.producer.Callback;
 import org.apache.kafka.clients.producer.KafkaProducer;
-import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.clients.producer.RecordMetadata;
-import org.apache.kafka.common.record.Records;
-
 
 public class ProducerPerformance {
 
+    private static final long NS_PER_MS = 1000000L;
+    private static final long NS_PER_SEC = 1000 * NS_PER_MS;
+    private static final long MIN_SLEEP_NS = 2 * NS_PER_MS;
+
     public static void main(String[] args) throws Exception {
-        if (args.length != 3) {
-            System.err.println("USAGE: java " + ProducerPerformance.class.getName() + " url num_records record_size");
+        if (args.length < 4) {
+            System.err.println("USAGE: java " + ProducerPerformance.class.getName() +
+                               " topic_name num_records record_size target_records_sec [prop_name=prop_value]*");
             System.exit(1);
         }
-        String url = args[0];
-        int numRecords = Integer.parseInt(args[1]);
+
+        /* parse args */
+        String topicName = args[0];
+        long numRecords = Long.parseLong(args[1]);
         int recordSize = Integer.parseInt(args[2]);
+        int throughput = Integer.parseInt(args[3]);
+
         Properties props = new Properties();
-        props.setProperty(ProducerConfig.REQUIRED_ACKS_CONFIG, "1");
-        props.setProperty(ProducerConfig.BROKER_LIST_CONFIG, url);
-        props.setProperty(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG, Integer.toString(5 * 1000));
-        props.setProperty(ProducerConfig.REQUEST_TIMEOUT_CONFIG, Integer.toString(Integer.MAX_VALUE));
-
-        KafkaProducer producer = new KafkaProducer(props);
-        Callback callback = new Callback() {
-            public void onCompletion(RecordMetadata metadata, Exception e) {
-                if (e != null)
-                    e.printStackTrace();
-            }
-        };
+        for (int i = 4; i < args.length; i++) {
+            String[] pieces = args[i].split("=");
+            if (pieces.length != 2)
+                throw new IllegalArgumentException("Invalid property: " + args[i]);
+            props.put(pieces[0], pieces[1]);
+        }
+        KafkaProducer<byte[], byte[]> producer = new KafkaProducer<byte[],byte[]>(props);
+
+        /* setup perf test */
         byte[] payload = new byte[recordSize];
         Arrays.fill(payload, (byte) 1);
-        ProducerRecord record = new ProducerRecord("test", payload);
+        ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>(topicName, payload);
+        long sleepTime = NS_PER_SEC / throughput;
+        long sleepDeficitNs = 0;
+        Stats stats = new Stats(numRecords, 5000);
         long start = System.currentTimeMillis();
-        long maxLatency = -1L;
-        long totalLatency = 0;
-        int reportingInterval = 1000000;
         for (int i = 0; i < numRecords; i++) {
             long sendStart = System.currentTimeMillis();
-            producer.send(record, callback);
-            long sendEllapsed = System.currentTimeMillis() - sendStart;
-            maxLatency = Math.max(maxLatency, sendEllapsed);
-            totalLatency += sendEllapsed;
-            if (i % reportingInterval == 0) {
-                System.out.printf("%d  max latency = %d ms, avg latency = %.5f\n",
-                                  i,
-                                  maxLatency,
-                                  (totalLatency / (double) reportingInterval));
-                totalLatency = 0L;
-                maxLatency = -1L;
+            Callback cb = stats.nextCompletion(sendStart, payload.length, stats);
+            producer.send(record, cb);
+
+            /*
+             * Maybe sleep a little to control throughput. Sleep time can be a bit inaccurate for times < 1 ms so
+             * instead of sleeping each time instead wait until a minimum sleep time accumulates (the "sleep deficit")
+             * and then make up the whole deficit in one longer sleep.
+             */
+            if (throughput > 0) {
+                float elapsed = (sendStart - start)/1000.f;
+                if (elapsed > 0 && i/elapsed > throughput) {
+                    sleepDeficitNs += sleepTime;
+                    if (sleepDeficitNs >= MIN_SLEEP_NS) {
+                        long sleepMs = sleepDeficitNs / 1000000;
+                        long sleepNs = sleepDeficitNs - sleepMs * 1000000;
+                        Thread.sleep(sleepMs, (int) sleepNs);
+                        sleepDeficitNs = 0;
+                    }
+                }
             }
         }
-        long ellapsed = System.currentTimeMillis() - start;
-        double msgsSec = 1000.0 * numRecords / (double) ellapsed;
-        double mbSec = msgsSec * (recordSize + Records.LOG_OVERHEAD) / (1024.0 * 1024.0);
-        System.out.printf("%d records sent in %d ms ms. %.2f records per second (%.2f mb/sec).", numRecords, ellapsed, msgsSec, mbSec);
+
+        /* print final results */
         producer.close();
+        stats.printTotal();
+    }
+
+    private static class Stats {
+        private long start;
+        private long windowStart;
+        private int[] latencies;
+        private int sampling;
+        private int iteration;
+        private int index;
+        private long count;
+        private long bytes;
+        private int maxLatency;
+        private long totalLatency;
+        private long windowCount;
+        private int windowMaxLatency;
+        private long windowTotalLatency;
+        private long windowBytes;
+        private long reportingInterval;
+
+        public Stats(long numRecords, int reportingInterval) {
+            this.start = System.currentTimeMillis();
+            this.windowStart = System.currentTimeMillis();
+            this.index = 0;
+            this.iteration = 0;
+            this.sampling = (int) (numRecords / Math.min(numRecords, 500000));
+            this.latencies = new int[(int) (numRecords / this.sampling) + 1];
+            this.index = 0;
+            this.maxLatency = 0;
+            this.totalLatency = 0;
+            this.windowCount = 0;
+            this.windowMaxLatency = 0;
+            this.windowTotalLatency = 0;
+            this.windowBytes = 0;
+            this.totalLatency = 0;
+            this.reportingInterval = reportingInterval;
+        }
+
+        public void record(int iter, int latency, int bytes, long time) {
+            this.count++;
+            this.bytes += bytes;
+            this.totalLatency += latency;
+            this.maxLatency = Math.max(this.maxLatency, latency);
+            this.windowCount++;
+            this.windowBytes += bytes;
+            this.windowTotalLatency += latency;
+            this.windowMaxLatency = Math.max(windowMaxLatency, latency);
+            if (iter % this.sampling == 0) {
+                this.latencies[index] = latency;
+                this.index++;
+            }
+            /* maybe report the recent perf */
+            if (time - windowStart >= reportingInterval) {
+                printWindow();
+                newWindow();
+            }
+        }
+
+        public Callback nextCompletion(long start, int bytes, Stats stats) {
+            Callback cb = new PerfCallback(this.iteration, start, bytes, stats);
+            this.iteration++;
+            return cb;
+        }
+
+        public void printWindow() {
+            long ellapsed = System.currentTimeMillis() - windowStart;
+            double recsPerSec = 1000.0 * windowCount / (double) ellapsed;
+            double mbPerSec = 1000.0 * this.windowBytes / (double) ellapsed / (1024.0 * 1024.0);
+            System.out.printf("%d records sent, %.1f records/sec (%.2f MB/sec), %.1f ms avg latency, %.1f max latency.\n",
+                              windowCount,
+                              recsPerSec,
+                              mbPerSec,
+                              windowTotalLatency / (double) windowCount,
+                              (double) windowMaxLatency);
+        }
+
+        public void newWindow() {
+            this.windowStart = System.currentTimeMillis();
+            this.windowCount = 0;
+            this.windowMaxLatency = 0;
+            this.windowTotalLatency = 0;
+            this.windowBytes = 0;
+        }
+
+        public void printTotal() {
+            long ellapsed = System.currentTimeMillis() - start;
+            double recsPerSec = 1000.0 * count / (double) ellapsed;
+            double mbPerSec = 1000.0 * this.bytes / (double) ellapsed / (1024.0 * 1024.0);
+            int[] percs = percentiles(this.latencies, index, 0.5, 0.95, 0.99, 0.999);
+            System.out.printf("%d records sent, %f records/sec (%.2f MB/sec), %.2f ms avg latency, %.2f ms max latency, %d ms 50th, %d ms 95th, %d ms 99th, %d ms 99.9th.\n",
+                              count,
+                              recsPerSec,
+                              mbPerSec,
+                              totalLatency / (double) count,
+                              (double) maxLatency,
+                              percs[0],
+                              percs[1],
+                              percs[2],
+                              percs[3]);
+        }
+
+        private static int[] percentiles(int[] latencies, int count, double... percentiles) {
+            int size = Math.min(count, latencies.length);
+            Arrays.sort(latencies, 0, size);
+            int[] values = new int[percentiles.length];
+            for (int i = 0; i < percentiles.length; i++) {
+                int index = (int) (percentiles[i] * size);
+                values[i] = latencies[index];
+            }
+            return values;
+        }
+    }
+
+    private static final class PerfCallback implements Callback {
+        private final long start;
+        private final int iteration;
+        private final int bytes;
+        private final Stats stats;
+
+        public PerfCallback(int iter, long start, int bytes, Stats stats) {
+            this.start = start;
+            this.stats = stats;
+            this.iteration = iter;
+            this.bytes = bytes;
+        }
+
+        public void onCompletion(RecordMetadata metadata, Exception exception) {
+            long now = System.currentTimeMillis();
+            int latency = (int) (now - start);
+            this.stats.record(iteration, latency, bytes, now);
+            if (exception != null)
+                exception.printStackTrace();
+        }
     }
 
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/Cluster.java b/clients/src/main/java/org/apache/kafka/common/Cluster.java
index c17a8f8162db9..d3299b944062d 100644
--- a/clients/src/main/java/org/apache/kafka/common/Cluster.java
+++ b/clients/src/main/java/org/apache/kafka/common/Cluster.java
@@ -1,42 +1,31 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common;
 
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-
 import org.apache.kafka.common.utils.Utils;
 
+import java.net.InetSocketAddress;
+import java.util.*;
 
 /**
  * A representation of a subset of the nodes, topics, and partitions in the Kafka cluster.
  */
 public final class Cluster {
 
-    private final AtomicInteger counter = new AtomicInteger(0);
     private final List<Node> nodes;
     private final Map<TopicPartition, PartitionInfo> partitionsByTopicPartition;
     private final Map<String, List<PartitionInfo>> partitionsByTopic;
+    private final Map<Integer, List<PartitionInfo>> partitionsByNode;
 
     /**
      * Create a new cluster with the given nodes and partitions
@@ -54,18 +43,32 @@ public Cluster(Collection<Node> nodes, Collection<PartitionInfo> partitions) {
         for (PartitionInfo p : partitions)
             this.partitionsByTopicPartition.put(new TopicPartition(p.topic(), p.partition()), p);
 
-        // index the partitions by topic and make the lists unmodifiable so we can handle them out in
-        // user-facing apis without risk of the client modifying the contents
-        HashMap<String, List<PartitionInfo>> parts = new HashMap<String, List<PartitionInfo>>();
+        // index the partitions by topic and node respectively, and make the lists
+        // unmodifiable so we can hand them out in user-facing apis without risk
+        // of the client modifying the contents
+        HashMap<String, List<PartitionInfo>> partsForTopic = new HashMap<String, List<PartitionInfo>>();
+        HashMap<Integer, List<PartitionInfo>> partsForNode = new HashMap<Integer, List<PartitionInfo>>();
+        for (Node n : this.nodes) {
+            partsForNode.put(n.id(), new ArrayList<PartitionInfo>());
+        }
         for (PartitionInfo p : partitions) {
-            if (!parts.containsKey(p.topic()))
-                parts.put(p.topic(), new ArrayList<PartitionInfo>());
-            List<PartitionInfo> ps = parts.get(p.topic());
-            ps.add(p);
+            if (!partsForTopic.containsKey(p.topic()))
+                partsForTopic.put(p.topic(), new ArrayList<PartitionInfo>());
+            List<PartitionInfo> psTopic = partsForTopic.get(p.topic());
+            psTopic.add(p);
+
+            if (p.leader() != null) {
+                List<PartitionInfo> psNode = Utils.notNull(partsForNode.get(p.leader().id()));
+                psNode.add(p);
+            }
         }
-        this.partitionsByTopic = new HashMap<String, List<PartitionInfo>>(parts.size());
-        for (Map.Entry<String, List<PartitionInfo>> entry : parts.entrySet())
+        this.partitionsByTopic = new HashMap<String, List<PartitionInfo>>(partsForTopic.size());
+        for (Map.Entry<String, List<PartitionInfo>> entry : partsForTopic.entrySet())
             this.partitionsByTopic.put(entry.getKey(), Collections.unmodifiableList(entry.getValue()));
+        this.partitionsByNode = new HashMap<Integer, List<PartitionInfo>>(partsForNode.size());
+        for (Map.Entry<Integer, List<PartitionInfo>> entry : partsForNode.entrySet())
+            this.partitionsByNode.put(entry.getKey(), Collections.unmodifiableList(entry.getValue()));
+
     }
 
     /**
@@ -82,9 +85,9 @@ public static Cluster empty() {
      */
     public static Cluster bootstrap(List<InetSocketAddress> addresses) {
         List<Node> nodes = new ArrayList<Node>();
-        int nodeId = Integer.MIN_VALUE;
+        int nodeId = -1;
         for (InetSocketAddress address : addresses)
-            nodes.add(new Node(nodeId++, address.getHostName(), address.getPort()));
+            nodes.add(new Node(nodeId--, address.getHostName(), address.getPort()));
         return new Cluster(nodes, new ArrayList<PartitionInfo>(0));
     }
 
@@ -122,19 +125,30 @@ public PartitionInfo partition(TopicPartition topicPartition) {
      * @param topic The topic name
      * @return A list of partitions
      */
-    public List<PartitionInfo> partitionsFor(String topic) {
+    public List<PartitionInfo> partitionsForTopic(String topic) {
         return this.partitionsByTopic.get(topic);
     }
 
     /**
-     * Round-robin over the nodes in this cluster
+     * Get the list of partitions whose leader is this node
+     * @param nodeId The node id
+     * @return A list of partitions
+     */
+    public List<PartitionInfo> partitionsForNode(int nodeId) {
+        return this.partitionsByNode.get(nodeId);
+    }
+
+    /**
+     * Get all topics.
+     * @return a set of all topics
      */
-    public Node nextNode() {
-        int size = nodes.size();
-        if (size == 0)
-            throw new IllegalStateException("No known nodes.");
-        int idx = Utils.abs(counter.getAndIncrement()) % size;
-        return this.nodes.get(idx);
+    public Set<String> topics() {
+        return this.partitionsByTopic.keySet();
+    }
+
+    @Override
+    public String toString() {
+        return "Cluster(nodes = " + this.nodes + ", partitions = " + this.partitionsByTopicPartition.values() + ")";
     }
 
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/Node.java b/clients/src/main/java/org/apache/kafka/common/Node.java
index 4197e5098c655..0e47ff3ff0e05 100644
--- a/clients/src/main/java/org/apache/kafka/common/Node.java
+++ b/clients/src/main/java/org/apache/kafka/common/Node.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common;
 
@@ -86,7 +82,7 @@ public boolean equals(Object obj) {
 
     @Override
     public String toString() {
-        return "Node(" + id + ", " + host + ", " + port + ")";
+        return "Node(" + (id < 0 ? "" : id + ", ") + host + ", " + port + ")";
     }
 
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/PartitionInfo.java b/clients/src/main/java/org/apache/kafka/common/PartitionInfo.java
index 08d66f1a71fc5..b15aa2c3ef2d7 100644
--- a/clients/src/main/java/org/apache/kafka/common/PartitionInfo.java
+++ b/clients/src/main/java/org/apache/kafka/common/PartitionInfo.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common;
 
@@ -71,4 +67,29 @@ public Node[] inSyncReplicas() {
         return inSyncReplicas;
     }
 
+    @Override
+    public String toString() {
+        return String.format("Partition(topic = %s, partition = %d, leader = %d, replicas = %s, isr = %s",
+                             topic,
+                             partition,
+                             leader.id(),
+                             fmtNodeIds(replicas),
+                             fmtNodeIds(inSyncReplicas));
+    }
+
+    /* Extract the node ids from each item in the array and format for display */
+    private String fmtNodeIds(Node[] nodes) {
+        StringBuilder b = new StringBuilder("[");
+        for (int i = 0; i < nodes.length - 1; i++) {
+            b.append(Integer.toString(nodes[i].id()));
+            b.append(',');
+        }
+        if (nodes.length > 0) {
+            b.append(Integer.toString(nodes[nodes.length - 1].id()));
+            b.append(',');
+        }
+        b.append("]");
+        return b.toString();
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java b/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
index c3148e5a9061d..3d4ab7228926f 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
@@ -1,21 +1,18 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.config;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -25,7 +22,8 @@
 import org.apache.kafka.common.Configurable;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.utils.Utils;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A convenient base class for configurations to extend.
@@ -34,10 +32,17 @@
  */
 public class AbstractConfig {
 
+    private final Logger log = LoggerFactory.getLogger(getClass());
+
+    /* configs for which values have been requested, used to detect unused configs */
     private final Set<String> used;
-    private final Map<String, Object> values;
+
+    /* the original values passed in by the user */
     private final Map<String, ?> originals;
 
+    /* the parsed values */
+    private final Map<String, Object> values;
+
     @SuppressWarnings("unchecked")
     public AbstractConfig(ConfigDef definition, Map<?, ?> originals) {
         /* check that all the keys are really strings */
@@ -47,6 +52,7 @@ public AbstractConfig(ConfigDef definition, Map<?, ?> originals) {
         this.originals = (Map<String, ?>) originals;
         this.values = definition.parse(this.originals);
         this.used = Collections.synchronizedSet(new HashSet<String>());
+        logAll();
     }
 
     protected Object get(String key) {
@@ -60,10 +66,14 @@ public int getInt(String key) {
         return (Integer) get(key);
     }
 
-    public Long getLong(String key) {
+    public long getLong(String key) {
         return (Long) get(key);
     }
 
+    public double getDouble(String key) {
+        return (Double) get(key);
+    }
+
     @SuppressWarnings("unchecked")
     public List<String> getList(String key) {
         return (List<String>) get(key);
@@ -83,10 +93,33 @@ public Class<?> getClass(String key) {
 
     public Set<String> unused() {
         Set<String> keys = new HashSet<String>(originals.keySet());
-        keys.remove(used);
+        keys.removeAll(used);
         return keys;
     }
 
+    private void logAll() {
+        StringBuilder b = new StringBuilder();
+        b.append(getClass().getSimpleName());
+        b.append(" values: ");
+        b.append(Utils.NL);
+        for (Map.Entry<String, Object> entry : this.values.entrySet()) {
+            b.append('\t');
+            b.append(entry.getKey());
+            b.append(" = ");
+            b.append(entry.getValue());
+            b.append(Utils.NL);
+        }
+        log.info(b.toString());
+    }
+
+    /**
+     * Log warnings for any unused configurations
+     */
+    public void logUnused() {
+        for (String key : unused())
+            log.warn("The configuration {} = {} was supplied but isn't a known config.", key, this.values.get(key));
+    }
+
     /**
      * Get a configured instance of the give class specified by the given configuration key. If the object implements
      * Configurable configure it using the configuration.
@@ -107,4 +140,26 @@ public <T> T getConfiguredInstance(String key, Class<T> t) {
         return t.cast(o);
     }
 
+    public <T> List<T> getConfiguredInstances(String key, Class<T> t) {
+        List<String> klasses = getList(key);
+        List<T> objects = new ArrayList<T>();
+        for (String klass : klasses) {
+            Class<?> c;
+            try {
+                c = Class.forName(klass);
+            } catch (ClassNotFoundException e) {
+                throw new ConfigException(key, klass, "Class " + klass + " could not be found.");
+            }
+            if (c == null)
+                return null;
+            Object o = Utils.newInstance(c);
+            if (!t.isInstance(o))
+                throw new KafkaException(c.getName() + " is not an instance of " + t.getName());
+            if (o instanceof Configurable)
+                ((Configurable) o).configure(this.originals);
+            objects.add(t.cast(o));
+        }
+        return objects;
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
index 61257d1ac6ee3..98cb79b701918 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
@@ -1,25 +1,25 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.config;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * This class is used for specifying the set of expected configurations, their type, their defaults, their
@@ -45,24 +45,33 @@
  */
 public class ConfigDef {
 
-    private static final Object NO_DEFAULT_VALUE = new Object();
+    private static final Object NO_DEFAULT_VALUE = new String("");
 
     private final Map<String, ConfigKey> configKeys = new HashMap<String, ConfigKey>();
 
+    /**
+     * Returns unmodifiable set of properties names defined in this {@linkplain ConfigDef}
+     * @return new unmodifiable {@link Set} instance containing the keys
+     */
+    public Set<String> names() {
+        return Collections.unmodifiableSet(configKeys.keySet());
+    }
+
     /**
      * Define a new configuration
      * @param name The name of the config parameter
      * @param type The type of the config
      * @param defaultValue The default value to use if this config isn't present
      * @param validator A validator to use in checking the correctness of the config
+     * @param importance The importance of this config: is this something you will likely need to change.
      * @param documentation The documentation string for the config
      * @return This ConfigDef so you can chain calls
      */
-    public ConfigDef define(String name, Type type, Object defaultValue, Validator validator, String documentation) {
+    public ConfigDef define(String name, Type type, Object defaultValue, Validator validator, Importance importance, String documentation) {
         if (configKeys.containsKey(name))
             throw new ConfigException("Configuration " + name + " is defined twice.");
         Object parsedDefault = defaultValue == NO_DEFAULT_VALUE ? NO_DEFAULT_VALUE : parseType(name, defaultValue, type);
-        configKeys.put(name, new ConfigKey(name, type, parsedDefault, validator, documentation));
+        configKeys.put(name, new ConfigKey(name, type, parsedDefault, validator, importance, documentation));
         return this;
     }
 
@@ -71,11 +80,12 @@ public ConfigDef define(String name, Type type, Object defaultValue, Validator v
      * @param name The name of the config parameter
      * @param type The type of the config
      * @param defaultValue The default value to use if this config isn't present
+     * @param importance The importance of this config: is this something you will likely need to change.
      * @param documentation The documentation string for the config
      * @return This ConfigDef so you can chain calls
      */
-    public ConfigDef define(String name, Type type, Object defaultValue, String documentation) {
-        return define(name, type, defaultValue, null, documentation);
+    public ConfigDef define(String name, Type type, Object defaultValue, Importance importance, String documentation) {
+        return define(name, type, defaultValue, null, importance, documentation);
     }
 
     /**
@@ -83,22 +93,24 @@ public ConfigDef define(String name, Type type, Object defaultValue, String docu
      * @param name The name of the config parameter
      * @param type The type of the config
      * @param validator A validator to use in checking the correctness of the config
+     * @param importance The importance of this config: is this something you will likely need to change.
      * @param documentation The documentation string for the config
      * @return This ConfigDef so you can chain calls
      */
-    public ConfigDef define(String name, Type type, Validator validator, String documentation) {
-        return define(name, type, NO_DEFAULT_VALUE, validator, documentation);
+    public ConfigDef define(String name, Type type, Validator validator, Importance importance, String documentation) {
+        return define(name, type, NO_DEFAULT_VALUE, validator, importance, documentation);
     }
 
     /**
      * Define a required parameter with no default value and no special validation logic
      * @param name The name of the config parameter
      * @param type The type of the config
+     * @param importance The importance of this config: is this something you will likely need to change.
      * @param documentation The documentation string for the config
      * @return This ConfigDef so you can chain calls
      */
-    public ConfigDef define(String name, Type type, String documentation) {
-        return define(name, type, NO_DEFAULT_VALUE, null, documentation);
+    public ConfigDef define(String name, Type type, Importance importance, String documentation) {
+        return define(name, type, NO_DEFAULT_VALUE, null, importance, documentation);
     }
 
     /**
@@ -121,6 +133,8 @@ else if (key.defaultValue == NO_DEFAULT_VALUE)
                 throw new ConfigException("Missing required configuration \"" + key.name + "\" which has no default value.");
             else
                 value = key.defaultValue;
+            if (key.validator != null)
+                key.validator.ensureValid(key.name, value);
             values.put(key.name, value);
         }
         return values;
@@ -140,8 +154,14 @@ private Object parseType(String name, Object value, Type type) {
                 trimmed = ((String) value).trim();
             switch (type) {
                 case BOOLEAN:
-                    if (value instanceof String)
-                        return Boolean.parseBoolean(trimmed);
+                    if (value instanceof String) {
+                    	if (trimmed.equalsIgnoreCase("true"))
+                    		return true;
+                    	else if (trimmed.equalsIgnoreCase("false"))
+                    		return false;
+                    	else
+                    		throw new ConfigException(name, value, "Expected value to be either true or false");
+                    }
                     else if (value instanceof Boolean)
                         return value;
                     else
@@ -179,9 +199,12 @@ else if (value instanceof String)
                     if (value instanceof List)
                         return (List<?>) value;
                     else if (value instanceof String)
-                        return Arrays.asList(trimmed.split("\\s*,\\s*", -1));
+                        if (trimmed.isEmpty())
+                            return Collections.emptyList();
+                        else
+                            return Arrays.asList(trimmed.split("\\s*,\\s*", -1));
                     else
-                        throw new ConfigException(name, value, "Expected a comma seperated list.");
+                        throw new ConfigException(name, value, "Expected a comma separated list.");
                 case CLASS:
                     if (value instanceof Class)
                         return (Class<?>) value;
@@ -206,6 +229,10 @@ public enum Type {
         BOOLEAN, STRING, INT, LONG, DOUBLE, LIST, CLASS;
     }
 
+    public enum Importance {
+        HIGH, MEDIUM, LOW
+    }
+
     /**
      * Validation logic the user may provide
      */
@@ -230,7 +257,7 @@ private Range(Number min, Number max) {
          * @param min The minimum acceptable value
          */
         public static Range atLeast(Number min) {
-            return new Range(min, Double.MAX_VALUE);
+            return new Range(min, null);
         }
 
         /**
@@ -242,28 +269,139 @@ public static Range between(Number min, Number max) {
 
         public void ensureValid(String name, Object o) {
             Number n = (Number) o;
-            if (n.doubleValue() < min.doubleValue() || n.doubleValue() > max.doubleValue())
-                throw new ConfigException(name, o, "Value must be in the range [" + min + ", " + max + "]");
+            if (min != null && n.doubleValue() < min.doubleValue())
+                throw new ConfigException(name, o, "Value must be at least " + min);
+            if (max != null && n.doubleValue() > max.doubleValue())
+                throw new ConfigException(name, o, "Value must be no more than " + max);
+        }
+
+        public String toString() {
+            if (min == null)
+                return "[...," + max + "]";
+            else if (max == null)
+                return "[" + min + ",...]";
+            else
+                return "[" + min + ",...," + max + "]";
         }
     }
 
+  public static class ValidString implements Validator {
+    List<String> validStrings;
+
+    private ValidString(List<String> validStrings) {
+      this.validStrings = validStrings;
+    }
+
+    public static ValidString in(List<String> validStrings) {
+      return new ValidString(validStrings);
+    }
+
+    @Override
+    public void ensureValid(String name, Object o) {
+
+      String s = (String) o;
+
+      if (!validStrings.contains(s)) {
+        throw new ConfigException(name,o,"String must be one of:" +join(validStrings));
+      }
+
+    }
+
+    public String toString() {
+      return "[" + join(validStrings) + "]";
+    }
+
+    private String join(List<String> list)
+    {
+      StringBuilder sb = new StringBuilder();
+      boolean first = true;
+      for (String item : list)
+      {
+        if (first)
+          first = false;
+        else
+          sb.append(",");
+        sb.append(item);
+      }
+      return sb.toString();
+    }
+  }
+
     private static class ConfigKey {
         public final String name;
         public final Type type;
         public final String documentation;
         public final Object defaultValue;
         public final Validator validator;
+        public final Importance importance;
 
-        public ConfigKey(String name, Type type, Object defaultValue, Validator validator, String documentation) {
+        public ConfigKey(String name, Type type, Object defaultValue, Validator validator, Importance importance, String documentation) {
             super();
             this.name = name;
             this.type = type;
             this.defaultValue = defaultValue;
             this.validator = validator;
+            this.importance = importance;
             if (this.validator != null)
                 this.validator.ensureValid(name, defaultValue);
             this.documentation = documentation;
         }
 
+        public boolean hasDefault() {
+            return this.defaultValue != NO_DEFAULT_VALUE;
+        }
+
+    }
+
+    public String toHtmlTable() {
+        // sort first required fields, then by importance, then name
+        List<ConfigDef.ConfigKey> configs = new ArrayList<ConfigDef.ConfigKey>(this.configKeys.values());
+        Collections.sort(configs, new Comparator<ConfigDef.ConfigKey>() {
+            public int compare(ConfigDef.ConfigKey k1, ConfigDef.ConfigKey k2) {
+                // first take anything with no default value
+                if (!k1.hasDefault() && k2.hasDefault())
+                    return -1;
+                else if (!k2.hasDefault() && k1.hasDefault())
+                    return 1;
+
+                // then sort by importance
+                int cmp = k1.importance.compareTo(k2.importance);
+                if (cmp == 0)
+                    // then sort in alphabetical order
+                    return k1.name.compareTo(k2.name);
+                else
+                    return cmp;
+            }
+        });
+        StringBuilder b = new StringBuilder();
+        b.append("<table>\n");
+        b.append("<tr>\n");
+        b.append("<th>Name</th>\n");
+        b.append("<th>Type</th>\n");
+        b.append("<th>Default</th>\n");
+        b.append("<th>Importance</th>\n");
+        b.append("<th>Description</th>\n");
+        b.append("</tr>\n");
+        for (ConfigKey def : configs) {
+            b.append("<tr>\n");
+            b.append("<td>");
+            b.append(def.name);
+            b.append("</td>");
+            b.append("<td>");
+            b.append(def.type.toString().toLowerCase());
+            b.append("</td>");
+            b.append("<td>");
+            b.append(def.defaultValue == null ? "" : def.defaultValue);
+            b.append("</td>");
+            b.append("<td>");
+            b.append(def.importance.toString().toLowerCase());
+            b.append("</td>");
+            b.append("<td>");
+            b.append(def.documentation);
+            b.append("</td>");
+            b.append("</tr>\n");
+        }
+        b.append("</table>");
+        return b.toString();
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/CorruptRecordException.java b/clients/src/main/java/org/apache/kafka/common/errors/CorruptRecordException.java
index 673f61d6271c5..eaccf276dbfb3 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/CorruptRecordException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/CorruptRecordException.java
@@ -1,22 +1,22 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class CorruptRecordException extends ApiException {
+/**
+ * This exception indicates a record has failed it's internal CRC check, this generally indicates network or disk
+ * corruption.
+ */
+public class CorruptRecordException extends RetriableException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/DeserializationException.java b/clients/src/main/java/org/apache/kafka/common/errors/DeserializationException.java
new file mode 100644
index 0000000000000..a5433398fb978
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/DeserializationException.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.common.errors;
+
+import org.apache.kafka.common.KafkaException;
+
+/**
+ *  Any exception during deserialization in the consumer
+ */
+public class DeserializationException extends KafkaException {
+
+    private static final long serialVersionUID = 1L;
+
+    public DeserializationException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public DeserializationException(String message) {
+        super(message);
+    }
+
+    public DeserializationException(Throwable cause) {
+        super(cause);
+    }
+
+    public DeserializationException() {
+        super();
+    }
+
+    /* avoid the expensive and useless stack trace for deserialization exceptions */
+    @Override
+    public Throwable fillInStackTrace() {
+        return this;
+    }
+
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/InvalidMetadataException.java b/clients/src/main/java/org/apache/kafka/common/errors/InvalidMetadataException.java
new file mode 100644
index 0000000000000..8841badb2d783
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/InvalidMetadataException.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.common.errors;
+
+/**
+ * An exception that may indicate the client's metadata is out of date
+ */
+public abstract class InvalidMetadataException extends RetriableException {
+
+    private static final long serialVersionUID = 1L;
+
+    public InvalidMetadataException() {
+        super();
+    }
+
+    public InvalidMetadataException(String message) {
+        super(message);
+    }
+
+    public InvalidMetadataException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public InvalidMetadataException(Throwable cause) {
+        super(cause);
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/InvalidTopicException.java b/clients/src/main/java/org/apache/kafka/common/errors/InvalidTopicException.java
new file mode 100644
index 0000000000000..1d90b592d1641
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/InvalidTopicException.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * The client has attempted to perform an operation on an invalid topic.
+ */
+public class InvalidTopicException extends ApiException {
+
+    private static final long serialVersionUID = 1L;
+
+    public InvalidTopicException() {
+        super();
+    }
+
+    public InvalidTopicException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public InvalidTopicException(String message) {
+        super(message);
+    }
+
+    public InvalidTopicException(Throwable cause) {
+        super(cause);
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/LeaderNotAvailableException.java b/clients/src/main/java/org/apache/kafka/common/errors/LeaderNotAvailableException.java
index 0bde6b5a351fc..9d7ebd47a8439 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/LeaderNotAvailableException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/LeaderNotAvailableException.java
@@ -1,35 +1,27 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class LeaderNotAvailableException extends RetryableException {
+/**
+ * There is no currently available leader for the given partition (either because a leadership election is in progress
+ * or because all replicas are down).
+ */
+public class LeaderNotAvailableException extends InvalidMetadataException {
 
     private static final long serialVersionUID = 1L;
 
-    public LeaderNotAvailableException(String message, Throwable cause) {
-        super(message, cause);
-    }
-
     public LeaderNotAvailableException(String message) {
         super(message);
     }
 
-    public LeaderNotAvailableException(Throwable cause) {
-        super(cause);
-    }
-
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/NetworkException.java b/clients/src/main/java/org/apache/kafka/common/errors/NetworkException.java
index 3a041593d76cf..f0baa983f081e 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/NetworkException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/NetworkException.java
@@ -1,22 +1,22 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class NetworkException extends ApiException {
+/**
+ * A misc. network-related IOException occurred when making a request. This could be because the client's metadata is
+ * out of date and it is making a request to a node that is now dead.
+ */
+public class NetworkException extends InvalidMetadataException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasAfterAppendException.java b/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasAfterAppendException.java
new file mode 100644
index 0000000000000..75c80a97e4308
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasAfterAppendException.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * Number of insync replicas for the partition is lower than min.insync.replicas
+ * This exception is raised when the low ISR size is discovered *after* the message
+ * was already appended to the log. Producer retries will cause duplicates.
+ */
+public class NotEnoughReplicasAfterAppendException extends RetriableException {
+  private static final long serialVersionUID = 1L;
+
+  public NotEnoughReplicasAfterAppendException() {
+    super();
+  }
+
+  public NotEnoughReplicasAfterAppendException(String message, Throwable cause) {
+    super(message,cause);
+  }
+
+  public NotEnoughReplicasAfterAppendException(String message) {
+    super(message);
+  }
+
+  public NotEnoughReplicasAfterAppendException(Throwable cause) {
+    super(cause);
+  }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasException.java b/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasException.java
new file mode 100644
index 0000000000000..486d5155bbb1f
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/NotEnoughReplicasException.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * Number of insync replicas for the partition is lower than min.insync.replicas
+ */
+public class NotEnoughReplicasException extends RetriableException {
+  private static final long serialVersionUID = 1L;
+
+  public NotEnoughReplicasException() {
+    super();
+  }
+
+  public NotEnoughReplicasException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public NotEnoughReplicasException(String message) {
+    super(message);
+  }
+
+  public NotEnoughReplicasException(Throwable cause) {
+    super(cause);
+  }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/NotLeaderForPartitionException.java b/clients/src/main/java/org/apache/kafka/common/errors/NotLeaderForPartitionException.java
index 5adc72ccf2d0c..ad9c77c41c492 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/NotLeaderForPartitionException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/NotLeaderForPartitionException.java
@@ -1,22 +1,21 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class NotLeaderForPartitionException extends RetryableException {
+/**
+ * This server is not the leader for the given partition
+ */
+public class NotLeaderForPartitionException extends InvalidMetadataException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/OffsetMetadataTooLarge.java b/clients/src/main/java/org/apache/kafka/common/errors/OffsetMetadataTooLarge.java
index a3159bb1034e7..0be2f500685b0 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/OffsetMetadataTooLarge.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/OffsetMetadataTooLarge.java
@@ -1,21 +1,20 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
+/**
+ * The client has tried to save its offset with associated metadata larger than the maximum size allowed by the server.
+ */
 public class OffsetMetadataTooLarge extends ApiException {
 
     private static final long serialVersionUID = 1L;
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/OffsetOutOfRangeException.java b/clients/src/main/java/org/apache/kafka/common/errors/OffsetOutOfRangeException.java
index d01698a3efca7..fc7c6e3471b05 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/OffsetOutOfRangeException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/OffsetOutOfRangeException.java
@@ -1,22 +1,22 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class OffsetOutOfRangeException extends ApiException {
+/**
+ * This offset is either larger or smaller than the range of offsets the server has for the given partition.
+ * 
+ */
+public class OffsetOutOfRangeException extends RetriableException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/RecordBatchTooLargeException.java b/clients/src/main/java/org/apache/kafka/common/errors/RecordBatchTooLargeException.java
new file mode 100644
index 0000000000000..f3f3f27d34dfb
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/RecordBatchTooLargeException.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * This record batch is larger than the maximum allowable size
+ */
+public class RecordBatchTooLargeException extends ApiException {
+
+    private static final long serialVersionUID = 1L;
+
+    public RecordBatchTooLargeException() {
+        super();
+    }
+
+    public RecordBatchTooLargeException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public RecordBatchTooLargeException(String message) {
+        super(message);
+    }
+
+    public RecordBatchTooLargeException(Throwable cause) {
+        super(cause);
+    }
+
+}
+
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/RecordTooLargeException.java b/clients/src/main/java/org/apache/kafka/common/errors/RecordTooLargeException.java
index ce95ca04aa842..737b7f07b16a0 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/RecordTooLargeException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/RecordTooLargeException.java
@@ -1,21 +1,20 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
+/**
+ * This record is larger than the maximum allowable size
+ */
 public class RecordTooLargeException extends ApiException {
 
     private static final long serialVersionUID = 1L;
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/RetriableException.java b/clients/src/main/java/org/apache/kafka/common/errors/RetriableException.java
new file mode 100644
index 0000000000000..6c639a972d7e4
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/RetriableException.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * A retryable exception is a transient exception that if retried may succeed.
+ */
+public abstract class RetriableException extends ApiException {
+
+    private static final long serialVersionUID = 1L;
+
+    public RetriableException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public RetriableException(String message) {
+        super(message);
+    }
+
+    public RetriableException(Throwable cause) {
+        super(cause);
+    }
+
+    public RetriableException() {
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/RetryableException.java b/clients/src/main/java/org/apache/kafka/common/errors/RetryableException.java
deleted file mode 100644
index c7f2f222f712a..0000000000000
--- a/clients/src/main/java/org/apache/kafka/common/errors/RetryableException.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.kafka.common.errors;
-
-/**
- * A retryable exception is an exception that is safe to retry. To be retryable an exception should be
- * <ol>
- * <li>Transient, there is no point retrying a error due to a non-existant topic or message too large
- * <li>Idempotent, the exception is known to not change any state on the server
- * </ol>
- * A client may choose to retry any request they like, but exceptions extending this class are always safe and sane to
- * retry.
- */
-public abstract class RetryableException extends ApiException {
-
-    private static final long serialVersionUID = 1L;
-
-    public RetryableException(String message, Throwable cause) {
-        super(message, cause);
-    }
-
-    public RetryableException(String message) {
-        super(message);
-    }
-
-    public RetryableException(Throwable cause) {
-        super(cause);
-    }
-
-    public RetryableException() {
-    }
-
-}
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/SerializationException.java b/clients/src/main/java/org/apache/kafka/common/errors/SerializationException.java
new file mode 100644
index 0000000000000..00388d12794f0
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/SerializationException.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+import org.apache.kafka.common.KafkaException;
+
+/**
+ *  Any exception during serialization in the producer
+ */
+public class SerializationException extends KafkaException {
+
+    private static final long serialVersionUID = 1L;
+
+    public SerializationException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public SerializationException(String message) {
+        super(message);
+    }
+
+    public SerializationException(Throwable cause) {
+        super(cause);
+    }
+
+    public SerializationException() {
+        super();
+    }
+
+    /* avoid the expensive and useless stack trace for serialization exceptions */
+    @Override
+    public Throwable fillInStackTrace() {
+        return this;
+    }
+
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/TimeoutException.java b/clients/src/main/java/org/apache/kafka/common/errors/TimeoutException.java
index dffd64d19c35a..c7f569ca87369 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/TimeoutException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/TimeoutException.java
@@ -1,22 +1,21 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class TimeoutException extends ApiException {
+/**
+ * Indicates that a request timed out.
+ */
+public class TimeoutException extends RetriableException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/UnknownServerException.java b/clients/src/main/java/org/apache/kafka/common/errors/UnknownServerException.java
index a0690fe2870bf..963ef081db51d 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/UnknownServerException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/UnknownServerException.java
@@ -1,21 +1,22 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
+/**
+ * An error occurred on the server for which the client doesn't have a corresponding error code. This is generally an
+ * unexpected error.
+ * 
+ */
 public class UnknownServerException extends ApiException {
 
     private static final long serialVersionUID = 1L;
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/UnknownTopicOrPartitionException.java b/clients/src/main/java/org/apache/kafka/common/errors/UnknownTopicOrPartitionException.java
index 73d1953cbe045..ec423bd01298c 100644
--- a/clients/src/main/java/org/apache/kafka/common/errors/UnknownTopicOrPartitionException.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/UnknownTopicOrPartitionException.java
@@ -1,22 +1,21 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.errors;
 
-public class UnknownTopicOrPartitionException extends ApiException {
+/**
+ * This topic/partition doesn't exist
+ */
+public class UnknownTopicOrPartitionException extends RetriableException {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockInputStream.java b/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockInputStream.java
new file mode 100644
index 0000000000000..5be72fef1f976
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockInputStream.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.common.message;
+
+import static org.apache.kafka.common.message.KafkaLZ4BlockOutputStream.LZ4_FRAME_INCOMPRESSIBLE_MASK;
+import static org.apache.kafka.common.message.KafkaLZ4BlockOutputStream.LZ4_MAX_HEADER_LENGTH;
+import static org.apache.kafka.common.message.KafkaLZ4BlockOutputStream.MAGIC;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.kafka.common.message.KafkaLZ4BlockOutputStream.BD;
+import org.apache.kafka.common.message.KafkaLZ4BlockOutputStream.FLG;
+import org.apache.kafka.common.utils.Utils;
+
+import net.jpountz.lz4.LZ4Exception;
+import net.jpountz.lz4.LZ4Factory;
+import net.jpountz.lz4.LZ4SafeDecompressor;
+import net.jpountz.xxhash.XXHash32;
+import net.jpountz.xxhash.XXHashFactory;
+
+/**
+ * A partial implementation of the v1.4.1 LZ4 Frame format.
+ * 
+ * @see <a href="https://docs.google.com/document/d/1Tdxmn5_2e5p1y4PtXkatLndWVb0R8QARJFe6JI4Keuo/edit">LZ4 Framing Format Spec</a>
+ */
+public final class KafkaLZ4BlockInputStream extends FilterInputStream {
+
+  public static final String PREMATURE_EOS = "Stream ended prematurely";
+  public static final String NOT_SUPPORTED = "Stream unsupported";
+  public static final String BLOCK_HASH_MISMATCH = "Block checksum mismatch";
+  public static final String DESCRIPTOR_HASH_MISMATCH = "Stream frame descriptor corrupted";
+  
+  private final LZ4SafeDecompressor decompressor;
+  private final XXHash32 checksum;
+  private final byte[] buffer;
+  private final byte[] compressedBuffer;
+  private final int maxBlockSize;
+  private FLG flg;
+  private BD bd;
+  private int bufferOffset;
+  private int bufferSize;
+  private boolean finished;
+  
+  /**
+   * Create a new {@link InputStream} that will decompress data using the LZ4 algorithm.
+   * 
+   * @param in The stream to decompress
+   * @throws IOException
+   */
+  public KafkaLZ4BlockInputStream(InputStream in) throws IOException {
+    super(in);
+    decompressor = LZ4Factory.fastestInstance().safeDecompressor();
+    checksum = XXHashFactory.fastestInstance().hash32();
+    readHeader();
+    maxBlockSize = bd.getBlockMaximumSize();
+    buffer = new byte[maxBlockSize];
+    compressedBuffer = new byte[maxBlockSize];
+    bufferOffset = 0;
+    bufferSize = 0;
+    finished = false;
+  }
+  
+  /**
+   * Reads the magic number and frame descriptor from the underlying {@link InputStream}.
+   *  
+   * @throws IOException
+   */
+  private void readHeader() throws IOException {
+    byte[] header = new byte[LZ4_MAX_HEADER_LENGTH];
+    
+    // read first 6 bytes into buffer to check magic and FLG/BD descriptor flags
+    bufferOffset = 6;
+    if (in.read(header, 0, bufferOffset) != bufferOffset) {
+      throw new IOException(PREMATURE_EOS);
+    }
+
+    if (MAGIC != Utils.readUnsignedIntLE(header, bufferOffset-6)) {
+      throw new IOException(NOT_SUPPORTED);
+    }
+    flg = FLG.fromByte(header[bufferOffset-2]);
+    bd = BD.fromByte(header[bufferOffset-1]);
+    // TODO read uncompressed content size, update flg.validate()
+    // TODO read dictionary id, update flg.validate()
+    
+    // check stream descriptor hash
+    byte hash = (byte) ((checksum.hash(header, 0, bufferOffset, 0) >> 8) & 0xFF);
+    header[bufferOffset++] = (byte) in.read();
+    if (hash != header[bufferOffset-1]) {
+      throw new IOException(DESCRIPTOR_HASH_MISMATCH);
+    }
+  }
+
+  /**
+   * Decompresses (if necessary) buffered data, optionally computes and validates a XXHash32 checksum, 
+   * and writes the result to a buffer.
+   * 
+   * @throws IOException
+   */
+  private void readBlock() throws IOException {
+    int blockSize = Utils.readUnsignedIntLE(in);
+
+    // Check for EndMark
+    if (blockSize == 0) {
+      finished = true;
+      // TODO implement content checksum, update flg.validate()
+      return;
+    } else if (blockSize > maxBlockSize) {
+      throw new IOException(String.format("Block size %s exceeded max: %s", blockSize, maxBlockSize));
+    }
+    
+    boolean compressed = (blockSize & LZ4_FRAME_INCOMPRESSIBLE_MASK) == 0;
+    byte[] bufferToRead;
+    if (compressed) {
+      bufferToRead = compressedBuffer;
+    } else {
+      blockSize &= ~LZ4_FRAME_INCOMPRESSIBLE_MASK;
+      bufferToRead = buffer;
+      bufferSize = blockSize;
+    }
+    
+    if (in.read(bufferToRead, 0, blockSize) != blockSize) {
+      throw new IOException(PREMATURE_EOS);
+    }
+
+    // verify checksum
+    if (flg.isBlockChecksumSet() && Utils.readUnsignedIntLE(in) != checksum.hash(bufferToRead, 0, blockSize, 0)) {
+      throw new IOException(BLOCK_HASH_MISMATCH);
+    }
+
+    if (compressed) {
+      try {
+        bufferSize = decompressor.decompress(compressedBuffer, 0, blockSize, buffer, 0, maxBlockSize);
+      } catch (LZ4Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+    bufferOffset = 0;
+  }
+ 
+  @Override
+  public int read() throws IOException {
+    if (finished) {
+      return -1;
+    }
+    if (available() == 0) {
+      readBlock();
+    }
+    if (finished) {
+      return -1;
+    }
+    int value = buffer[bufferOffset++] & 0xFF;
+    
+    return value;
+  }
+
+  @Override
+  public int read(byte b[], int off, int len) throws IOException {
+    net.jpountz.util.Utils.checkRange(b, off, len);
+    if (finished) {
+      return -1;
+    }
+    if (available() == 0) {
+      readBlock();
+    }
+    if (finished) {
+      return -1;
+    }
+    len = Math.min(len, available());
+    System.arraycopy(buffer, bufferOffset, b, off, len);
+    bufferOffset += len;
+    return len;
+  }
+
+  @Override
+  public long skip(long n) throws IOException {
+    if (finished) {
+      return 0;
+    }
+    if (available() == 0) {
+      readBlock();
+    }
+    if (finished) {
+      return 0;
+    }
+    n = Math.min(n, available());
+    bufferOffset += n;
+    return n;
+  }
+
+  @Override
+  public int available() throws IOException {
+    return bufferSize - bufferOffset;
+  }
+
+  @Override
+  public void close() throws IOException {
+      in.close();
+  }
+
+  @Override
+  public synchronized void mark(int readlimit) {
+      throw new RuntimeException("mark not supported");
+  }
+
+  @Override
+  public synchronized void reset() throws IOException {
+      throw new RuntimeException("reset not supported");
+  }
+
+  @Override
+  public boolean markSupported() {
+      return false;
+  }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockOutputStream.java b/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockOutputStream.java
new file mode 100644
index 0000000000000..e5b9e433e14ef
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/message/KafkaLZ4BlockOutputStream.java
@@ -0,0 +1,387 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.common.message;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.kafka.common.utils.Utils;
+
+import net.jpountz.lz4.LZ4Compressor;
+import net.jpountz.lz4.LZ4Factory;
+import net.jpountz.xxhash.XXHash32;
+import net.jpountz.xxhash.XXHashFactory;
+
+/**
+ * A partial implementation of the v1.4.1 LZ4 Frame format.
+ * 
+ * @see <a href="https://docs.google.com/document/d/1Tdxmn5_2e5p1y4PtXkatLndWVb0R8QARJFe6JI4Keuo/edit">LZ4 Framing Format Spec</a>
+ */
+public final class KafkaLZ4BlockOutputStream extends FilterOutputStream {
+
+  public static final int MAGIC = 0x184D2204;
+  public static final int LZ4_MAX_HEADER_LENGTH = 19;
+  public static final int LZ4_FRAME_INCOMPRESSIBLE_MASK = 0x80000000;
+  
+  public static final String CLOSED_STREAM = "The stream is already closed";
+  
+  public static final int BLOCKSIZE_64KB = 4;
+  public static final int BLOCKSIZE_256KB = 5;
+  public static final int BLOCKSIZE_1MB = 6;
+  public static final int BLOCKSIZE_4MB = 7;
+  
+  private final LZ4Compressor compressor;
+  private final XXHash32 checksum;
+  private final FLG flg;
+  private final BD bd;
+  private final byte[] buffer;
+  private final byte[] compressedBuffer;
+  private final int maxBlockSize;
+  private int bufferOffset;
+  private boolean finished;
+
+  /**
+   * Create a new {@link OutputStream} that will compress data using the LZ4 algorithm.
+   *
+   * @param out The output stream to compress
+   * @param blockSize Default: 4. The block size used during compression. 4=64kb, 5=256kb, 6=1mb, 7=4mb. All other values will generate an exception
+   * @param blockChecksum Default: false. When true, a XXHash32 checksum is computed and appended to the stream for every block of data
+   * @throws IOException
+   */
+  public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockChecksum) throws IOException {
+    super(out);
+    compressor = LZ4Factory.fastestInstance().fastCompressor();
+    checksum = XXHashFactory.fastestInstance().hash32();
+    bd = new BD(blockSize);
+    flg = new FLG(blockChecksum);
+    bufferOffset = 0;
+    maxBlockSize = bd.getBlockMaximumSize();
+    buffer = new byte[maxBlockSize];
+    compressedBuffer = new byte[compressor.maxCompressedLength(maxBlockSize)];
+    finished = false;
+    writeHeader();
+  }
+  
+  /**
+   * Create a new {@link OutputStream} that will compress data using the LZ4 algorithm.
+   *  
+   * @param out The stream to compress
+   * @param blockSize Default: 4. The block size used during compression. 4=64kb, 5=256kb, 6=1mb, 7=4mb. All other values will generate an exception
+   * @throws IOException
+   */
+  public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize) throws IOException {
+    this(out, blockSize, false);
+  }
+  
+  /**
+   * Create a new {@link OutputStream} that will compress data using the LZ4 algorithm.
+   * 
+   * @param out The output stream to compress
+   * @throws IOException
+   */
+  public KafkaLZ4BlockOutputStream(OutputStream out) throws IOException {
+    this(out, BLOCKSIZE_64KB);
+  }
+
+  /**
+   * Writes the magic number and frame descriptor to the underlying {@link OutputStream}.
+   *  
+   * @throws IOException
+   */
+  private void writeHeader() throws IOException {
+    Utils.writeUnsignedIntLE(buffer, 0, MAGIC);
+    bufferOffset = 4;
+    buffer[bufferOffset++] = flg.toByte();
+    buffer[bufferOffset++] = bd.toByte();
+    // TODO write uncompressed content size, update flg.validate()
+    // TODO write dictionary id, update flg.validate()
+    // compute checksum on all descriptor fields 
+    int hash = (checksum.hash(buffer, 0, bufferOffset, 0) >> 8) & 0xFF;
+    buffer[bufferOffset++] = (byte) hash;
+    // write out frame descriptor
+    out.write(buffer, 0, bufferOffset);
+    bufferOffset = 0;
+  }
+  
+  /**
+   * Compresses buffered data, optionally computes an XXHash32 checksum, and writes
+   * the result to the underlying {@link OutputStream}.
+   * 
+   * @throws IOException
+   */
+  private void writeBlock() throws IOException {
+    if (bufferOffset == 0) {
+      return;
+    }
+    
+    int compressedLength = compressor.compress(buffer, 0, bufferOffset, compressedBuffer, 0);
+    byte[] bufferToWrite = compressedBuffer;
+    int compressMethod = 0;
+    
+    // Store block uncompressed if compressed length is greater (incompressible)
+    if (compressedLength >= bufferOffset) {
+      bufferToWrite = buffer;
+      compressedLength = bufferOffset;
+      compressMethod = LZ4_FRAME_INCOMPRESSIBLE_MASK;
+    }
+
+    // Write content
+    Utils.writeUnsignedIntLE(out, compressedLength | compressMethod);
+    out.write(bufferToWrite, 0, compressedLength);
+    
+    // Calculate and write block checksum
+    if (flg.isBlockChecksumSet()) {
+      int hash = checksum.hash(bufferToWrite, 0, compressedLength, 0);
+      Utils.writeUnsignedIntLE(out, hash);
+    }
+    bufferOffset = 0;
+  }
+  
+  /**
+   * Similar to the {@link #writeBlock()} method.  Writes a 0-length block 
+   * (without block checksum) to signal the end of the block stream.
+   * 
+   * @throws IOException
+   */
+  private void writeEndMark() throws IOException {
+    Utils.writeUnsignedIntLE(out, 0);
+    // TODO implement content checksum, update flg.validate()
+    finished = true;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    ensureNotFinished();
+    if (bufferOffset == maxBlockSize) {
+      writeBlock();
+    }
+    buffer[bufferOffset++] = (byte) b;
+  }
+  
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    net.jpountz.util.Utils.checkRange(b, off, len);
+    ensureNotFinished();
+     
+    int bufferRemainingLength = maxBlockSize - bufferOffset;
+    // while b will fill the buffer
+    while (len > bufferRemainingLength) {
+      // fill remaining space in buffer 
+      System.arraycopy(b, off, buffer, bufferOffset, bufferRemainingLength);
+      bufferOffset = maxBlockSize;
+      writeBlock();
+      // compute new offset and length
+      off += bufferRemainingLength;
+      len -= bufferRemainingLength;
+      bufferRemainingLength = maxBlockSize;
+    }
+    
+    System.arraycopy(b, off, buffer, bufferOffset, len);
+    bufferOffset += len;
+  }
+
+  @Override
+  public void flush() throws IOException {
+    if (!finished) {
+        writeBlock();
+    }
+    if (out != null) {
+      out.flush();
+    }
+  }
+
+  /**
+   * A simple state check to ensure the stream is still open.
+   */
+  private void ensureNotFinished() {
+    if (finished) {
+      throw new IllegalStateException(CLOSED_STREAM);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (!finished) {
+      writeEndMark();
+      flush();
+      finished = true;
+    }
+    if (out != null) {
+      out.close();
+      out = null;
+    }
+  }
+
+  public static class FLG {
+    
+    private static final int VERSION = 1;
+    
+    private final int presetDictionary;
+    private final int reserved1;
+    private final int contentChecksum;
+    private final int contentSize;
+    private final int blockChecksum;
+    private final int blockIndependence;
+    private final int version;
+    
+    public FLG() {
+      this(false);
+    }
+    
+    public FLG(boolean blockChecksum) {
+      this(0, 0, 0, 0, blockChecksum ? 1 : 0, 1, VERSION);
+    }
+    
+    private FLG(int presetDictionary, int reserved1, int contentChecksum, 
+        int contentSize, int blockChecksum, int blockIndependence, int version) {
+      this.presetDictionary = presetDictionary;
+      this.reserved1 = reserved1;
+      this.contentChecksum = contentChecksum;
+      this.contentSize = contentSize;
+      this.blockChecksum = blockChecksum;
+      this.blockIndependence = blockIndependence;
+      this.version = version;
+      validate();
+    }
+    
+    public static FLG fromByte(byte flg) {
+      int presetDictionary =  (flg >>> 0) & 1;
+      int reserved1 =         (flg >>> 1) & 1;
+      int contentChecksum =   (flg >>> 2) & 1;
+      int contentSize =       (flg >>> 3) & 1;
+      int blockChecksum =     (flg >>> 4) & 1;
+      int blockIndependence = (flg >>> 5) & 1;
+      int version =           (flg >>> 6) & 3;
+      
+      return new FLG(presetDictionary, reserved1, contentChecksum, 
+          contentSize, blockChecksum, blockIndependence, version);
+    }
+    
+    public byte toByte() {
+      return (byte) (
+            ((presetDictionary   & 1) << 0)
+          | ((reserved1          & 1) << 1)
+          | ((contentChecksum    & 1) << 2)
+          | ((contentSize        & 1) << 3)
+          | ((blockChecksum      & 1) << 4)
+          | ((blockIndependence  & 1) << 5)
+          | ((version            & 3) << 6) );
+    }
+    
+    private void validate() {
+      if (presetDictionary != 0) {
+        throw new RuntimeException("Preset dictionary is unsupported");
+      }
+      if (reserved1 != 0) {
+        throw new RuntimeException("Reserved1 field must be 0");
+      }
+      if (contentChecksum != 0) {
+        throw new RuntimeException("Content checksum is unsupported");
+      }
+      if (contentSize != 0) {
+        throw new RuntimeException("Content size is unsupported");
+      }
+      if (blockIndependence != 1) {
+        throw new RuntimeException("Dependent block stream is unsupported");
+      }
+      if (version != VERSION) {
+        throw new RuntimeException(String.format("Version %d is unsupported", version));
+      }
+    }
+    
+    public boolean isPresetDictionarySet() {
+      return presetDictionary == 1;
+    }
+    
+    public boolean isContentChecksumSet() {
+      return contentChecksum == 1;
+    }
+    
+    public boolean isContentSizeSet() {
+      return contentSize == 1;
+    }
+    
+    public boolean isBlockChecksumSet() {
+      return blockChecksum == 1;
+    }
+    
+    public boolean isBlockIndependenceSet() {
+      return blockIndependence == 1;
+    }
+    
+    public int getVersion() {
+      return version;
+    }
+  }
+  
+  public static class BD {
+    
+    private final int reserved2;
+    private final int blockSizeValue;
+    private final int reserved3;
+    
+    public BD() {
+      this(0, BLOCKSIZE_64KB, 0);
+    }
+    
+    public BD(int blockSizeValue) {
+      this(0, blockSizeValue, 0);
+    }
+    
+    private BD(int reserved2, int blockSizeValue, int reserved3) {
+      this.reserved2 = reserved2;
+      this.blockSizeValue = blockSizeValue;
+      this.reserved3 = reserved3;
+      validate();
+    }
+    
+    public static BD fromByte(byte bd) {
+      int reserved2 =        (bd >>> 0) & 15;
+      int blockMaximumSize = (bd >>> 4) & 7;
+      int reserved3 =        (bd >>> 7) & 1;
+      
+      return new BD(reserved2, blockMaximumSize, reserved3);
+    }
+    
+    private void validate() {
+      if (reserved2 != 0) {
+        throw new RuntimeException("Reserved2 field must be 0");
+      }
+      if (blockSizeValue < 4 || blockSizeValue > 7) {
+        throw new RuntimeException("Block size value must be between 4 and 7");
+      }
+      if (reserved3 != 0) {
+        throw new RuntimeException("Reserved3 field must be 0");
+      }
+    }
+    
+    // 2^(2n+8)
+    public int getBlockMaximumSize() {
+      return (1 << ((2 * blockSizeValue) + 8));
+    }
+    
+    public byte toByte() {
+      return (byte) (
+            ((reserved2       & 15) << 0)
+          | ((blockSizeValue  & 7) << 4)
+          | ((reserved3       & 1) << 7) );
+    }
+  }
+  
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/JmxReporter.java b/clients/src/main/java/org/apache/kafka/common/metrics/JmxReporter.java
index e08c349a6cd76..3c312011a7ff7 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/JmxReporter.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/JmxReporter.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
@@ -36,14 +32,17 @@
 import javax.management.ReflectionException;
 
 import org.apache.kafka.common.KafkaException;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Register metrics in JMX as dynamic mbeans based on the metric names
  */
 public class JmxReporter implements MetricsReporter {
 
-    private final String prefix;
+    private static final Logger log = LoggerFactory.getLogger(JmxReporter.class);
+    private static final Object lock = new Object();
+    private String prefix;
     private final Map<String, KafkaMbean> mbeans = new HashMap<String, KafkaMbean>();
 
     public JmxReporter() {
@@ -58,18 +57,25 @@ public JmxReporter(String prefix) {
     }
 
     @Override
-    public synchronized void init(List<KafkaMetric> metrics) {
-        for (KafkaMetric metric : metrics)
-            addAttribute(metric);
-        for (KafkaMbean mbean : mbeans.values())
-            reregister(mbean);
+    public void configure(Map<String, ?> configs) {
+    }
 
+    @Override
+    public void init(List<KafkaMetric> metrics) {
+        synchronized (lock) {
+            for (KafkaMetric metric : metrics)
+                addAttribute(metric);
+            for (KafkaMbean mbean : mbeans.values())
+                reregister(mbean);
+        }
     }
 
     @Override
-    public synchronized void metricChange(KafkaMetric metric) {
-        KafkaMbean mbean = addAttribute(metric);
-        reregister(mbean);
+    public void metricChange(KafkaMetric metric) {
+        synchronized (lock) {
+            KafkaMbean mbean = addAttribute(metric);
+            reregister(mbean);
+        }
     }
 
     private KafkaMbean addAttribute(KafkaMetric metric) {
@@ -86,10 +92,11 @@ private KafkaMbean addAttribute(KafkaMetric metric) {
         }
     }
 
-    public synchronized void close() {
-        for (KafkaMbean mbean : this.mbeans.values())
-            unregister(mbean);
-
+    public void close() {
+        synchronized (lock) {
+            for (KafkaMbean mbean : this.mbeans.values())
+                unregister(mbean);
+        }
     }
 
     private void unregister(KafkaMbean mbean) {
@@ -160,7 +167,7 @@ public AttributeList getAttributes(String[] names) {
                     list.add(new Attribute(name, getAttribute(name)));
                 return list;
             } catch (Exception e) {
-                e.printStackTrace();
+                log.error("Error getting JMX attribute: ", e);
                 return new AttributeList();
             }
         }
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/KafkaMetric.java b/clients/src/main/java/org/apache/kafka/common/metrics/KafkaMetric.java
index b2426ac927468..a7458b50cb16f 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/KafkaMetric.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/KafkaMetric.java
@@ -55,12 +55,12 @@ public String description() {
     @Override
     public double value() {
         synchronized (this.lock) {
-            return value(time.nanoseconds());
+            return value(time.milliseconds());
         }
     }
 
-    double value(long time) {
-        return this.measurable.measure(config, time);
+    double value(long timeMs) {
+        return this.measurable.measure(config, timeMs);
     }
 
     public void config(MetricConfig config) {
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Measurable.java b/clients/src/main/java/org/apache/kafka/common/metrics/Measurable.java
index 0f405c3f81314..79f61bca4c3fa 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Measurable.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Measurable.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
@@ -24,7 +20,7 @@ public interface Measurable {
     /**
      * Measure this quantity and return the result as a double
      * @param config The configuration for this metric
-     * @param now The time the measurement is being taken
+     * @param now The POSIX time in milliseconds the measurement is being taken
      * @return The measured value
      */
     public double measure(MetricConfig config, long now);
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/MetricConfig.java b/clients/src/main/java/org/apache/kafka/common/metrics/MetricConfig.java
index 4d14fbcc68a80..dfa1b0a11042a 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/MetricConfig.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/MetricConfig.java
@@ -26,7 +26,7 @@ public class MetricConfig {
     private Quota quota;
     private int samples;
     private long eventWindow;
-    private long timeWindowNs;
+    private long timeWindowMs;
     private TimeUnit unit;
 
     public MetricConfig() {
@@ -34,7 +34,7 @@ public MetricConfig() {
         this.quota = null;
         this.samples = 2;
         this.eventWindow = Long.MAX_VALUE;
-        this.timeWindowNs = TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
+        this.timeWindowMs = TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
         this.unit = TimeUnit.SECONDS;
     }
 
@@ -56,12 +56,12 @@ public MetricConfig eventWindow(long window) {
         return this;
     }
 
-    public long timeWindowNs() {
-        return timeWindowNs;
+    public long timeWindowMs() {
+        return timeWindowMs;
     }
 
     public MetricConfig timeWindow(long window, TimeUnit unit) {
-        this.timeWindowNs = TimeUnit.NANOSECONDS.convert(window, unit);
+        this.timeWindowMs = TimeUnit.MILLISECONDS.convert(window, unit);
         return this;
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java b/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
index 6db2dfbe94c94..49be4019ac038 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
@@ -1,32 +1,27 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
+import org.apache.kafka.common.utils.CopyOnWriteMap;
 import org.apache.kafka.common.utils.SystemTime;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
 
-
 /**
  * A registry of sensors and metrics.
  * <p>
@@ -67,7 +62,7 @@ public Metrics() {
      * Create a metrics repository with no metric reporters and default configuration.
      */
     public Metrics(Time time) {
-        this(new MetricConfig(), new ArrayList<MetricsReporter>(), time);
+        this(new MetricConfig(), new ArrayList<MetricsReporter>(0), time);
     }
 
     /**
@@ -87,8 +82,8 @@ public Metrics(MetricConfig defaultConfig) {
      */
     public Metrics(MetricConfig defaultConfig, List<MetricsReporter> reporters, Time time) {
         this.config = defaultConfig;
-        this.sensors = new ConcurrentHashMap<String, Sensor>();
-        this.metrics = new ConcurrentHashMap<String, KafkaMetric>();
+        this.sensors = new CopyOnWriteMap<String, Sensor>();
+        this.metrics = new CopyOnWriteMap<String, KafkaMetric>();
         this.reporters = Utils.notNull(reporters);
         this.time = time;
         for (MetricsReporter reporter : reporters)
@@ -96,8 +91,26 @@ public Metrics(MetricConfig defaultConfig, List<MetricsReporter> reporters, Time
     }
 
     /**
-     * Create a sensor with the given unique name and zero or more parent sensors. All parent sensors will receive every
-     * value recorded with this sensor.
+     * Get the sensor with the given name if it exists
+     * @param name The name of the sensor
+     * @return Return the sensor or null if no such sensor exists
+     */
+    public Sensor getSensor(String name) {
+        return this.sensors.get(Utils.notNull(name));
+    }
+
+    /**
+     * Get or create a sensor with the given unique name and no parent sensors.
+     * @param name The sensor name
+     * @return The sensor
+     */
+    public Sensor sensor(String name) {
+        return sensor(name, null, (Sensor[]) null);
+    }
+
+    /**
+     * Get or create a sensor with the given unique name and zero or more parent sensors. All parent sensors will
+     * receive every value recorded with this sensor.
      * @param name The name of the sensor
      * @param parents The parent sensors
      * @return The sensor that is created
@@ -107,15 +120,15 @@ public Sensor sensor(String name, Sensor... parents) {
     }
 
     /**
-     * Create a sensor with the given unique name and zero or more parent sensors. All parent sensors will receive every
-     * value recorded with this sensor.
+     * Get or create a sensor with the given unique name and zero or more parent sensors. All parent sensors will
+     * receive every value recorded with this sensor.
      * @param name The name of the sensor
      * @param config A default configuration to use for this sensor for metrics that don't have their own config
      * @param parents The parent sensors
      * @return The sensor that is created
      */
     public synchronized Sensor sensor(String name, MetricConfig config, Sensor... parents) {
-        Sensor s = this.sensors.get(Utils.notNull(name));
+        Sensor s = getSensor(name);
         if (s == null) {
             s = new Sensor(this, name, parents, config == null ? this.config : config, time);
             this.sensors.put(name, s);
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/MetricsReporter.java b/clients/src/main/java/org/apache/kafka/common/metrics/MetricsReporter.java
index 2c395b1f8f343..7acc19e4e9380 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/MetricsReporter.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/MetricsReporter.java
@@ -1,27 +1,25 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
 import java.util.List;
 
+import org.apache.kafka.common.Configurable;
+
 /**
- * A plugin interface to allow things to listen as new metrics are created so they can be reported
+ * A plugin interface to allow things to listen as new metrics are created so they can be reported.
  */
-public interface MetricsReporter {
+public interface MetricsReporter extends Configurable {
 
     /**
      * This is called when the reporter is first registered to initially register all existing metrics
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
index 7e4849b7a1480..25c1faf2887ea 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
@@ -26,7 +22,6 @@
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
 
-
 /**
  * A sensor applies a continuous sequence of numerical values to a set of associated metrics. For example a sensor on
  * message size would record a sequence of message sizes using the {@link #record(double)} api and would maintain a set
@@ -46,7 +41,7 @@ public final class Sensor {
         super();
         this.registry = registry;
         this.name = Utils.notNull(name);
-        this.parents = parents;
+        this.parents = parents == null ? new Sensor[0] : parents;
         this.metrics = new ArrayList<KafkaMetric>();
         this.stats = new ArrayList<Stat>();
         this.config = config;
@@ -83,30 +78,42 @@ public void record() {
      *         bound
      */
     public void record(double value) {
-        record(value, time.nanoseconds());
+        record(value, time.milliseconds());
     }
 
-    private void record(double value, long time) {
+    /**
+     * Record a value at a known time. This method is slightly faster than {@link #record(double)} since it will reuse
+     * the time stamp.
+     * @param value The value we are recording
+     * @param timeMs The current POSIX time in milliseconds
+     * @throws QuotaViolationException if recording this value moves a metric beyond its configured maximum or minimum
+     *         bound
+     */
+    public void record(double value, long timeMs) {
         synchronized (this) {
             // increment all the stats
             for (int i = 0; i < this.stats.size(); i++)
-                this.stats.get(i).record(config, value, time);
-            checkQuotas(time);
-
+                this.stats.get(i).record(config, value, timeMs);
+            checkQuotas(timeMs);
         }
         for (int i = 0; i < parents.length; i++)
-            parents[i].record(value, time);
+            parents[i].record(value, timeMs);
     }
 
-    private void checkQuotas(long time) {
+    /**
+     * Check if we have violated our quota for any metric that has a configured quota
+     * @param timeMs
+     */
+    private void checkQuotas(long timeMs) {
         for (int i = 0; i < this.metrics.size(); i++) {
             KafkaMetric metric = this.metrics.get(i);
             MetricConfig config = metric.config();
             if (config != null) {
                 Quota quota = config.quota();
-                if (quota != null)
-                    if (!quota.acceptable(metric.value(time)))
+                if (quota != null) {
+                    if (!quota.acceptable(metric.value(timeMs)))
                         throw new QuotaViolationException("Metric " + metric.name() + " is in violation of its quota of " + quota.bound());
+                }
             }
         }
     }
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Stat.java b/clients/src/main/java/org/apache/kafka/common/metrics/Stat.java
index e02389c969f52..0eb7ab2ad1a66 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Stat.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Stat.java
@@ -25,8 +25,8 @@ public interface Stat {
      * Record the given value
      * @param config The configuration to use for this metric
      * @param value The value to record
-     * @param time The time this value occurred
+     * @param timeMs The POSIX time in milliseconds this value occurred
      */
-    public void record(MetricConfig config, double value, long time);
+    public void record(MetricConfig config, double value, long timeMs);
 
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Avg.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Avg.java
index 51725b278495f..ed6767f369e02 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Avg.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Avg.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -20,7 +16,6 @@
 
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
  * A {@link SampledStat} that maintains a simple average over its samples.
  */
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Count.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Count.java
index 3cdd1d054e0b9..90c0bf549a9ec 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Count.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Count.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -20,7 +16,6 @@
 
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
  * A {@link SampledStat} that maintains a simple count of what it has seen.
  */
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Max.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Max.java
index bba59721faa14..6bbb0a3a706c5 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Max.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Max.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -20,7 +16,6 @@
 
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
  * A {@link SampledStat} that gives the max over its samples.
  */
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Min.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Min.java
index d370049a7f1c0..9f74417193a04 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Min.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Min.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -20,7 +16,6 @@
 
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
  * A {@link SampledStat} that gives the min over its samples.
  */
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Percentiles.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Percentiles.java
index 4d549167ea7c1..c70d577ada8c0 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Percentiles.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Percentiles.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -26,7 +22,6 @@
 import org.apache.kafka.common.metrics.stats.Histogram.ConstantBinScheme;
 import org.apache.kafka.common.metrics.stats.Histogram.LinearBinScheme;
 
-
 /**
  * A compound stat that reports one or more percentiles
  */
@@ -74,7 +69,7 @@ public double measure(MetricConfig config, long now) {
     }
 
     public double value(MetricConfig config, long now, double quantile) {
-        timeoutObsoleteSamples(config, now);
+        purgeObsoleteSamples(config, now);
         float count = 0.0f;
         for (Sample sample : this.samples)
             count += sample.eventCount;
@@ -99,12 +94,12 @@ public double combine(List<Sample> samples, MetricConfig config, long now) {
     }
 
     @Override
-    protected HistogramSample newSample(long now) {
-        return new HistogramSample(this.binScheme, now);
+    protected HistogramSample newSample(long timeMs) {
+        return new HistogramSample(this.binScheme, timeMs);
     }
 
     @Override
-    protected void update(Sample sample, MetricConfig config, double value, long now) {
+    protected void update(Sample sample, MetricConfig config, double value, long timeMs) {
         HistogramSample hist = (HistogramSample) sample;
         hist.histogram.record(value);
     }
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
index 3b0454f26490d..a5838b3894906 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -22,12 +18,11 @@
 import org.apache.kafka.common.metrics.MeasurableStat;
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
- * The rate of the given quanitity. By default this is the total observed over a set of samples from a sampled statistic
- * divided by the ellapsed time over the sample windows. Alternative {@link SampledStat} implementations can be
- * provided, however, to record the rate of occurences (e.g. the count of values measured over the time interval) or
- * other such values.
+ * The rate of the given quantity. By default this is the total observed over a set of samples from a sampled statistic
+ * divided by the elapsed time over the sample windows. Alternative {@link SampledStat} implementations can be provided,
+ * however, to record the rate of occurrences (e.g. the count of values measured over the time interval) or other such
+ * values.
  */
 public class Rate implements MeasurableStat {
 
@@ -42,6 +37,10 @@ public Rate(TimeUnit unit) {
         this(unit, new SampledTotal());
     }
 
+    public Rate(SampledStat stat) {
+        this(TimeUnit.SECONDS, stat);
+    }
+
     public Rate(TimeUnit unit, SampledStat stat) {
         this.stat = stat;
         this.unit = unit;
@@ -52,32 +51,33 @@ public String unitName() {
     }
 
     @Override
-    public void record(MetricConfig config, double value, long time) {
-        this.stat.record(config, value, time);
+    public void record(MetricConfig config, double value, long timeMs) {
+        this.stat.record(config, value, timeMs);
     }
 
     @Override
     public double measure(MetricConfig config, long now) {
-        double ellapsed = convert(now - stat.oldest().lastWindow);
-        return stat.measure(config, now) / ellapsed;
+        double value = stat.measure(config, now);
+        double elapsed = convert(now - stat.oldest(now).lastWindowMs);
+        return value / elapsed;
     }
 
     private double convert(long time) {
         switch (unit) {
             case NANOSECONDS:
-                return time;
+                return time * 1000.0 * 1000.0;
             case MICROSECONDS:
-                return time / 1000.0;
+                return time * 1000.0;
             case MILLISECONDS:
-                return time / (1000.0 * 1000.0);
+                return time;
             case SECONDS:
-                return time / (1000.0 * 1000.0 * 1000.0);
+                return time / (1000.0);
             case MINUTES:
-                return time / (60.0 * 1000.0 * 1000.0 * 1000.0);
+                return time / (60.0 * 1000.0);
             case HOURS:
-                return time / (60.0 * 60.0 * 1000.0 * 1000.0 * 1000.0);
+                return time / (60.0 * 60.0 * 1000.0);
             case DAYS:
-                return time / (24.0 * 60.0 * 60.0 * 1000.0 * 1000.0 * 1000.0);
+                return time / (24.0 * 60.0 * 60.0 * 1000.0);
             default:
                 throw new IllegalStateException("Unknown unit: " + unit);
         }
@@ -90,7 +90,7 @@ public SampledTotal() {
         }
 
         @Override
-        protected void update(Sample sample, MetricConfig config, double value, long now) {
+        protected void update(Sample sample, MetricConfig config, double value, long timeMs) {
             sample.value += value;
         }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/SampledStat.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/SampledStat.java
index f8b413a8c273c..b341b7daaa102 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/SampledStat.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/SampledStat.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -22,7 +18,6 @@
 import org.apache.kafka.common.metrics.MeasurableStat;
 import org.apache.kafka.common.metrics.MetricConfig;
 
-
 /**
  * A SampledStat records a single scalar value measured over one or more samples. Each sample is recorded over a
  * configurable window. The window can be defined by number of events or ellapsed time (or both, if both are given the
@@ -45,57 +40,65 @@ public SampledStat(double initialValue) {
     }
 
     @Override
-    public void record(MetricConfig config, double value, long now) {
-        Sample sample = current(now);
-        if (sample.isComplete(now, config))
-            sample = advance(config, now);
-        update(sample, config, value, now);
+    public void record(MetricConfig config, double value, long timeMs) {
+        Sample sample = current(timeMs);
+        if (sample.isComplete(timeMs, config))
+            sample = advance(config, timeMs);
+        update(sample, config, value, timeMs);
         sample.eventCount += 1;
     }
 
-    private Sample advance(MetricConfig config, long now) {
+    private Sample advance(MetricConfig config, long timeMs) {
         this.current = (this.current + 1) % config.samples();
         if (this.current >= samples.size()) {
-            Sample sample = newSample(now);
+            Sample sample = newSample(timeMs);
             this.samples.add(sample);
             return sample;
         } else {
-            Sample sample = current(now);
-            sample.reset(now);
+            Sample sample = current(timeMs);
+            sample.reset(timeMs);
             return sample;
         }
     }
 
-    protected Sample newSample(long now) {
-        return new Sample(this.initialValue, now);
+    protected Sample newSample(long timeMs) {
+        return new Sample(this.initialValue, timeMs);
     }
 
     @Override
     public double measure(MetricConfig config, long now) {
-        timeoutObsoleteSamples(config, now);
+        purgeObsoleteSamples(config, now);
         return combine(this.samples, config, now);
     }
 
-    public Sample current(long now) {
+    public Sample current(long timeMs) {
         if (samples.size() == 0)
-            this.samples.add(newSample(now));
+            this.samples.add(newSample(timeMs));
         return this.samples.get(this.current);
     }
 
-    public Sample oldest() {
-        return this.samples.get((this.current + 1) % this.samples.size());
+    public Sample oldest(long now) {
+        if (samples.size() == 0)
+            this.samples.add(newSample(now));
+        Sample oldest = this.samples.get(0);
+        for (int i = 1; i < this.samples.size(); i++) {
+            Sample curr = this.samples.get(i);
+            if (curr.lastWindowMs < oldest.lastWindowMs)
+                oldest = curr;
+        }
+        return oldest;
     }
 
-    protected abstract void update(Sample sample, MetricConfig config, double value, long now);
+    protected abstract void update(Sample sample, MetricConfig config, double value, long timeMs);
 
     public abstract double combine(List<Sample> samples, MetricConfig config, long now);
 
-    /* Timeout any windows that have expired in the absense of any events */
-    protected void timeoutObsoleteSamples(MetricConfig config, long now) {
+    /* Timeout any windows that have expired in the absence of any events */
+    protected void purgeObsoleteSamples(MetricConfig config, long now) {
+        long expireAge = config.samples() * config.timeWindowMs();
         for (int i = 0; i < samples.size(); i++) {
-            int idx = (this.current + i) % samples.size();
-            Sample sample = this.samples.get(idx);
-            if (now - sample.lastWindow >= (i + 1) * config.timeWindowNs())
+            Sample sample = this.samples.get(i);
+            if (now - sample.lastWindowMs >= expireAge)
                 sample.reset(now);
         }
     }
@@ -103,24 +106,24 @@ protected void timeoutObsoleteSamples(MetricConfig config, long now) {
     protected static class Sample {
         public double initialValue;
         public long eventCount;
-        public long lastWindow;
+        public long lastWindowMs;
         public double value;
 
         public Sample(double initialValue, long now) {
             this.initialValue = initialValue;
             this.eventCount = 0;
-            this.lastWindow = now;
+            this.lastWindowMs = now;
             this.value = initialValue;
         }
 
         public void reset(long now) {
             this.eventCount = 0;
-            this.lastWindow = now;
+            this.lastWindowMs = now;
             this.value = initialValue;
         }
 
-        public boolean isComplete(long now, MetricConfig config) {
-            return now - lastWindow >= config.timeWindowNs() || eventCount >= config.eventWindow();
+        public boolean isComplete(long timeMs, MetricConfig config) {
+            return timeMs - lastWindowMs >= config.timeWindowMs() || eventCount >= config.eventWindow();
         }
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Total.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Total.java
index a9940ed7ea1db..67999a90ab9fe 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Total.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Total.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics.stats;
 
@@ -35,7 +31,7 @@ public Total(double value) {
     }
 
     @Override
-    public void record(MetricConfig config, double value, long time) {
+    public void record(MetricConfig config, double value, long now) {
         this.total += value;
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java b/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
index a8e0413681cd1..c8213e156ec9c 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.network;
 
@@ -29,6 +25,7 @@ public class ByteBufferSend implements Send {
     private final int destination;
     protected final ByteBuffer[] buffers;
     private int remaining;
+    private int size;
 
     public ByteBufferSend(int destination, ByteBuffer... buffers) {
         super();
@@ -36,6 +33,7 @@ public ByteBufferSend(int destination, ByteBuffer... buffers) {
         this.buffers = buffers;
         for (int i = 0; i < buffers.length; i++)
             remaining += buffers[i].remaining();
+        this.size = remaining;
     }
 
     @Override
@@ -44,8 +42,8 @@ public int destination() {
     }
 
     @Override
-    public boolean complete() {
-        return remaining > 0;
+    public boolean completed() {
+        return remaining <= 0;
     }
 
     @Override
@@ -58,6 +56,10 @@ public int remaining() {
         return this.remaining;
     }
 
+    public int size() {
+        return this.size;
+    }
+
     @Override
     public long writeTo(GatheringByteChannel channel) throws IOException {
         long written = channel.write(buffers);
diff --git a/clients/src/main/java/org/apache/kafka/common/network/NetworkReceive.java b/clients/src/main/java/org/apache/kafka/common/network/NetworkReceive.java
index 51d4892dfc185..dcc639a4bb451 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/NetworkReceive.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/NetworkReceive.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.network;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/network/Selectable.java b/clients/src/main/java/org/apache/kafka/common/network/Selectable.java
index cef75d8cfd91d..b68bbf00ab8eb 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/Selectable.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/Selectable.java
@@ -60,23 +60,23 @@ public interface Selectable {
     public void poll(long timeout, List<NetworkSend> sends) throws IOException;
 
     /**
-     * The list of sends that completed on the last {@link #poll(long, List<NetworkSend>) poll()} call.
+     * The list of sends that completed on the last {@link #poll(long, List) poll()} call.
      */
     public List<NetworkSend> completedSends();
 
     /**
-     * The list of receives that completed on the last {@link #poll(long, List<NetworkSend>) poll()} call.
+     * The list of receives that completed on the last {@link #poll(long, List) poll()} call.
      */
     public List<NetworkReceive> completedReceives();
 
     /**
-     * The list of connections that finished disconnecting on the last {@link #poll(long, List<NetworkSend>) poll()}
+     * The list of connections that finished disconnecting on the last {@link #poll(long, List) poll()}
      * call.
      */
     public List<Integer> disconnected();
 
     /**
-     * The list of connections that completed their connection on the last {@link #poll(long, List<NetworkSend>) poll()}
+     * The list of connections that completed their connection on the last {@link #poll(long, List) poll()}
      * call.
      */
     public List<Integer> connected();
diff --git a/clients/src/main/java/org/apache/kafka/common/network/Selector.java b/clients/src/main/java/org/apache/kafka/common/network/Selector.java
index 8ed4c73146b2e..4dd2cdf773f7e 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/Selector.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/Selector.java
@@ -1,22 +1,19 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.network;
 
 import java.io.IOException;
+import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.nio.channels.CancelledKeyException;
@@ -29,9 +26,20 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.kafka.common.KafkaException;
-
+import org.apache.kafka.common.metrics.Measurable;
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.Count;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Rate;
+import org.apache.kafka.common.utils.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A selector interface for doing non-blocking multi-connection network I/O.
@@ -63,27 +71,33 @@
  */
 public class Selector implements Selectable {
 
+    private static final Logger log = LoggerFactory.getLogger(Selector.class);
+
     private final java.nio.channels.Selector selector;
     private final Map<Integer, SelectionKey> keys;
     private final List<NetworkSend> completedSends;
     private final List<NetworkReceive> completedReceives;
     private final List<Integer> disconnected;
     private final List<Integer> connected;
+    private final Time time;
+    private final SelectorMetrics sensors;
 
     /**
      * Create a new selector
      */
-    public Selector() {
+    public Selector(Metrics metrics, Time time) {
         try {
             this.selector = java.nio.channels.Selector.open();
         } catch (IOException e) {
             throw new KafkaException(e);
         }
+        this.time = time;
         this.keys = new HashMap<Integer, SelectionKey>();
         this.completedSends = new ArrayList<NetworkSend>();
         this.completedReceives = new ArrayList<NetworkReceive>();
         this.connected = new ArrayList<Integer>();
         this.disconnected = new ArrayList<Integer>();
+        this.sensors = new SelectorMetrics(metrics);
     }
 
     /**
@@ -97,10 +111,13 @@ public Selector() {
      * @param sendBufferSize The send buffer for the new connection
      * @param receiveBufferSize The receive buffer for the new connection
      * @throws IllegalStateException if there is already a connection for that id
-     * @throws UnresolvedAddressException if DNS resolution fails on the hostname
+     * @throws IOException if DNS resolution fails on the hostname or if the broker is down
      */
     @Override
     public void connect(int id, InetSocketAddress address, int sendBufferSize, int receiveBufferSize) throws IOException {
+        if (this.keys.containsKey(id))
+            throw new IllegalStateException("There is already a connection for id " + id);
+
         SocketChannel channel = SocketChannel.open();
         channel.configureBlocking(false);
         Socket socket = channel.socket();
@@ -111,13 +128,14 @@ public void connect(int id, InetSocketAddress address, int sendBufferSize, int r
         try {
             channel.connect(address);
         } catch (UnresolvedAddressException e) {
+            channel.close();
+            throw new IOException("Can't resolve address: " + address, e);
+        } catch (IOException e) {
             channel.close();
             throw e;
         }
         SelectionKey key = channel.register(this.selector, SelectionKey.OP_CONNECT);
         key.attach(new Transmissions(id));
-        if (this.keys.containsKey(key))
-            throw new IllegalStateException("There is already a connection for id " + id);
         this.keys.put(id, key);
     }
 
@@ -145,17 +163,12 @@ public void wakeup() {
      */
     @Override
     public void close() {
-        for (SelectionKey key : this.selector.keys()) {
-            try {
-                close(key);
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
-        }
+        for (SelectionKey key : this.selector.keys())
+            close(key);
         try {
             this.selector.close();
         } catch (IOException e) {
-            e.printStackTrace();
+            log.error("Exception closing selector:", e);
         }
     }
 
@@ -195,7 +208,11 @@ public void poll(long timeout, List<NetworkSend> sends) throws IOException {
         }
 
         /* check ready keys */
+        long startSelect = time.nanoseconds();
         int readyKeys = select(timeout);
+        long endSelect = time.nanoseconds();
+        this.sensors.selectTime.record(endSelect - startSelect, time.milliseconds());
+
         if (readyKeys > 0) {
             Set<SelectionKey> keys = this.selector.selectedKeys();
             Iterator<SelectionKey> iter = keys.iterator();
@@ -205,14 +222,17 @@ public void poll(long timeout, List<NetworkSend> sends) throws IOException {
 
                 Transmissions transmissions = transmissions(key);
                 SocketChannel channel = channel(key);
+
+                // register all per-broker metrics at once
+                sensors.maybeRegisterNodeMetrics(transmissions.id);
+
                 try {
-                    /*
-                     * complete any connections that have finished their handshake
-                     */
+                    /* complete any connections that have finished their handshake */
                     if (key.isConnectable()) {
                         channel.finishConnect();
                         key.interestOps(key.interestOps() & ~SelectionKey.OP_CONNECT | SelectionKey.OP_READ);
                         this.connected.add(transmissions.id);
+                        this.sensors.connectionCreated.record();
                     }
 
                     /* read from any connections that have readable data */
@@ -223,17 +243,17 @@ public void poll(long timeout, List<NetworkSend> sends) throws IOException {
                         if (transmissions.receive.complete()) {
                             transmissions.receive.payload().rewind();
                             this.completedReceives.add(transmissions.receive);
+                            this.sensors.recordBytesReceived(transmissions.id, transmissions.receive.payload().limit());
                             transmissions.clearReceive();
                         }
                     }
 
-                    /*
-                     * write to any sockets that have space in their buffer and for which we have data
-                     */
+                    /* write to any sockets that have space in their buffer and for which we have data */
                     if (key.isWritable()) {
                         transmissions.send.writeTo(channel);
                         if (transmissions.send.remaining() <= 0) {
                             this.completedSends.add(transmissions.send);
+                            this.sensors.recordBytesSent(transmissions.id, transmissions.send.size());
                             transmissions.clearSend();
                             key.interestOps(key.interestOps() & ~SelectionKey.OP_WRITE);
                         }
@@ -243,11 +263,17 @@ public void poll(long timeout, List<NetworkSend> sends) throws IOException {
                     if (!key.isValid())
                         close(key);
                 } catch (IOException e) {
-                    e.printStackTrace();
+                    InetAddress remoteAddress = null;
+                    Socket socket = channel.socket();
+                    if (socket != null)
+                        remoteAddress = socket.getInetAddress();
+                    log.warn("Error in I/O with {}", remoteAddress , e);
                     close(key);
                 }
             }
         }
+        long endIo = time.nanoseconds();
+        this.sensors.ioTime.record(endIo - endSelect, time.milliseconds());
     }
 
     @Override
@@ -299,15 +325,24 @@ else if (ms < 0L)
     /**
      * Begin closing this connection
      */
-    private void close(SelectionKey key) throws IOException {
+    private void close(SelectionKey key) {
         SocketChannel channel = channel(key);
         Transmissions trans = transmissions(key);
-        if (trans != null)
+        if (trans != null) {
             this.disconnected.add(trans.id);
+            this.keys.remove(trans.id);
+            trans.clearReceive();
+            trans.clearSend();
+        }
         key.attach(null);
         key.cancel();
-        channel.socket().close();
-        channel.close();
+        try {
+            channel.socket().close();
+            channel.close();
+        } catch (IOException e) {
+            log.error("Exception closing connection to node {}:", trans.id, e);
+        }
+        this.sensors.connectionClosed.record();
     }
 
     /**
@@ -363,4 +398,111 @@ public void clearReceive() {
         }
     }
 
+    private class SelectorMetrics {
+        private final Metrics metrics;
+        public final Sensor connectionClosed;
+        public final Sensor connectionCreated;
+        public final Sensor bytesTransferred;
+        public final Sensor bytesSent;
+        public final Sensor bytesReceived;
+        public final Sensor selectTime;
+        public final Sensor ioTime;
+
+        public SelectorMetrics(Metrics metrics) {
+            this.metrics = metrics;
+
+            this.connectionClosed = this.metrics.sensor("connections-closed");
+            this.connectionClosed.add("connection-close-rate", "Connections closed per second in the window.", new Rate());
+
+            this.connectionCreated = this.metrics.sensor("connections-created");
+            this.connectionCreated.add("connection-creation-rate", "New connections established per second in the window.", new Rate());
+
+            this.bytesTransferred = this.metrics.sensor("bytes-sent-received");
+            bytesTransferred.add("network-io-rate",
+                                 "The average number of network operations (reads or writes) on all connections per second.",
+                                 new Rate(new Count()));
+
+            this.bytesSent = this.metrics.sensor("bytes-sent", bytesTransferred);
+            this.bytesSent.add("outgoing-byte-rate", "The average number of outgoing bytes sent per second to all servers.", new Rate());
+            this.bytesSent.add("request-rate", "The average number of requests sent per second.", new Rate(new Count()));
+            this.bytesSent.add("request-size-avg", "The average size of all requests in the window..", new Avg());
+            this.bytesSent.add("request-size-max", "The maximum size of any request sent in the window.", new Max());
+
+            this.bytesReceived = this.metrics.sensor("bytes-received", bytesTransferred);
+            this.bytesReceived.add("incoming-byte-rate", "Bytes/second read off all sockets", new Rate());
+            this.bytesReceived.add("response-rate", "Responses received sent per second.", new Rate(new Count()));
+
+            this.selectTime = this.metrics.sensor("select-time");
+            this.selectTime.add("select-rate",
+                                "Number of times the I/O layer checked for new I/O to perform per second",
+                                new Rate(new Count()));
+            this.selectTime.add("io-wait-time-ns-avg",
+                                "The average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.",
+                                new Avg());
+            this.selectTime.add("io-wait-ratio", "The fraction of time the I/O thread spent waiting.", new Rate(TimeUnit.NANOSECONDS));
+
+            this.ioTime = this.metrics.sensor("io-time");
+            this.ioTime.add("io-time-ns-avg", "The average length of time for I/O per select call in nanoseconds.", new Avg());
+            this.ioTime.add("io-ratio", "The fraction of time the I/O thread spent doing I/O", new Rate(TimeUnit.NANOSECONDS));
+
+            this.metrics.addMetric("connection-count", "The current number of active connections.", new Measurable() {
+                public double measure(MetricConfig config, long now) {
+                    return keys.size();
+                }
+            });
+        }
+
+        public void maybeRegisterNodeMetrics(int node) {
+            if (node >= 0) {
+                // if one sensor of the metrics has been registered for the node,
+                // then all other sensors should have been registered; and vice versa
+                String nodeRequestName = "node-" + node + ".bytes-sent";
+                Sensor nodeRequest = this.metrics.getSensor(nodeRequestName);
+                if (nodeRequest == null) {
+                    nodeRequest = this.metrics.sensor(nodeRequestName);
+                    nodeRequest.add("node-" + node + ".outgoing-byte-rate", new Rate());
+                    nodeRequest.add("node-" + node + ".request-rate",
+                                    "The average number of requests sent per second.",
+                                    new Rate(new Count()));
+                    nodeRequest.add("node-" + node + ".request-size-avg", "The average size of all requests in the window..", new Avg());
+                    nodeRequest.add("node-" + node + ".request-size-max", "The maximum size of any request sent in the window.", new Max());
+
+                    String nodeResponseName = "node-" + node + ".bytes-received";
+                    Sensor nodeResponse = this.metrics.sensor(nodeResponseName);
+                    nodeResponse.add("node-" + node + ".incoming-byte-rate", new Rate());
+                    nodeResponse.add("node-" + node + ".response-rate",
+                                     "The average number of responses received per second.",
+                                     new Rate(new Count()));
+
+                    String nodeTimeName = "node-" + node + ".latency";
+                    Sensor nodeRequestTime = this.metrics.sensor(nodeTimeName);
+                    nodeRequestTime.add("node-" + node + ".request-latency-avg", new Avg());
+                    nodeRequestTime.add("node-" + node + ".request-latency-max", new Max());
+                }
+            }
+        }
+
+        public void recordBytesSent(int node, int bytes) {
+            long now = time.milliseconds();
+            this.bytesSent.record(bytes, now);
+            if (node >= 0) {
+                String nodeRequestName = "node-" + node + ".bytes-sent";
+                Sensor nodeRequest = this.metrics.getSensor(nodeRequestName);
+                if (nodeRequest != null)
+                    nodeRequest.record(bytes, now);
+            }
+        }
+
+        public void recordBytesReceived(int node, int bytes) {
+            long now = time.milliseconds();
+            this.bytesReceived.record(bytes, now);
+            if (node >= 0) {
+                String nodeRequestName = "node-" + node + ".bytes-received";
+                Sensor nodeRequest = this.metrics.getSensor(nodeRequestName);
+                if (nodeRequest != null)
+                    nodeRequest.record(bytes, now);
+            }
+        }
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/network/Send.java b/clients/src/main/java/org/apache/kafka/common/network/Send.java
index d62dff9433f50..5d321a09e4701 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/Send.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/Send.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.network;
 
@@ -38,7 +34,7 @@ public interface Send {
     /**
      * Is this send complete?
      */
-    public boolean complete();
+    public boolean completed();
 
     /**
      * An optional method to turn this send into an array of ByteBuffers if possible (otherwise returns null)
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java b/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
index 21a2592ea7c7f..109fc965e09b2 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
@@ -16,6 +16,10 @@
  */
 package org.apache.kafka.common.protocol;
 
+
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Identifiers for all the Kafka APIs
  */
@@ -26,15 +30,23 @@ public enum ApiKeys {
     METADATA(3, "metadata"),
     LEADER_AND_ISR(4, "leader_and_isr"),
     STOP_REPLICA(5, "stop_replica"),
-    OFFSET_COMMIT(6, "offset_commit"),
-    OFFSET_FETCH(7, "offset_fetch");
+    OFFSET_COMMIT(8, "offset_commit"),
+    OFFSET_FETCH(9, "offset_fetch"),
+    CONSUMER_METADATA(10, "consumer_metadata"),
+    JOIN_GROUP(11, "join_group"),
+    HEARTBEAT(12, "heartbeat");
 
-    public static int MAX_API_KEY = 0;
+    private static ApiKeys[] codeToType;
+    public static int MAX_API_KEY = -1;
 
     static {
         for (ApiKeys key : ApiKeys.values()) {
             MAX_API_KEY = Math.max(MAX_API_KEY, key.id);
         }
+        codeToType = new ApiKeys[MAX_API_KEY+1];
+        for (ApiKeys key : ApiKeys.values()) {
+            codeToType[key.id] = key;
+        }
     }
 
     /** the perminant and immutable id of an API--this can't change ever */
@@ -48,4 +60,7 @@ private ApiKeys(int id, String name) {
         this.name = name;
     }
 
+    public static ApiKeys forId(int id) {
+        return codeToType[id];
+    }
 }
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
index f88992a0cafd9..3316b6a109831 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
@@ -19,42 +19,34 @@
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.kafka.common.errors.ApiException;
-import org.apache.kafka.common.errors.CorruptRecordException;
-import org.apache.kafka.common.errors.LeaderNotAvailableException;
-import org.apache.kafka.common.errors.NetworkException;
-import org.apache.kafka.common.errors.NotLeaderForPartitionException;
-import org.apache.kafka.common.errors.OffsetMetadataTooLarge;
-import org.apache.kafka.common.errors.OffsetOutOfRangeException;
-import org.apache.kafka.common.errors.RecordTooLargeException;
-import org.apache.kafka.common.errors.TimeoutException;
-import org.apache.kafka.common.errors.UnknownServerException;
-import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
+import org.apache.kafka.common.errors.*;
 
 
 /**
  * This class contains all the client-server errors--those errors that must be sent from the server to the client. These
  * are thus part of the protocol. The names can be changed but the error code cannot.
- * 
+ *
  * Do not add exceptions that occur only on the client or only on the server here.
  */
 public enum Errors {
     UNKNOWN(-1, new UnknownServerException("The server experienced an unexpected error when processing the request")),
     NONE(0, null),
-    OFFSET_OUT_OF_RANGE(1,
-                        new OffsetOutOfRangeException("The requested offset is not within the range of offsets maintained by the server.")),
-    CORRUPT_MESSAGE(2,
-                    new CorruptRecordException("The message contents does not match the message CRC or the message is otherwise corrupt.")),
+    OFFSET_OUT_OF_RANGE(1, new OffsetOutOfRangeException("The requested offset is not within the range of offsets maintained by the server.")),
+    CORRUPT_MESSAGE(2, new CorruptRecordException("The message contents does not match the message CRC or the message is otherwise corrupt.")),
     UNKNOWN_TOPIC_OR_PARTITION(3, new UnknownTopicOrPartitionException("This server does not host this topic-partition.")),
-    LEADER_NOT_AVAILABLE(5,
-                         new LeaderNotAvailableException("There is no leader for this topic-partition as we are in the middle of a leadership election.")),
+    // TODO: errorCode 4 for InvalidFetchSize
+    LEADER_NOT_AVAILABLE(5, new LeaderNotAvailableException("There is no leader for this topic-partition as we are in the middle of a leadership election.")),
     NOT_LEADER_FOR_PARTITION(6, new NotLeaderForPartitionException("This server is not the leader for that topic-partition.")),
     REQUEST_TIMED_OUT(7, new TimeoutException("The request timed out.")),
-    MESSAGE_TOO_LARGE(10,
-                      new RecordTooLargeException("The request included a message larger than the max message size the server will accept.")),
+    // TODO: errorCode 8, 9, 11
+    MESSAGE_TOO_LARGE(10, new RecordTooLargeException("The request included a message larger than the max message size the server will accept.")),
     OFFSET_METADATA_TOO_LARGE(12, new OffsetMetadataTooLarge("The metadata field of the offset request was too large.")),
-    NETWORK_EXCEPTION(13, new NetworkException("The server disconnected before a response was received."));
-
+    NETWORK_EXCEPTION(13, new NetworkException("The server disconnected before a response was received.")),
+    // TODO: errorCode 14, 15, 16
+    INVALID_TOPIC_EXCEPTION(17, new InvalidTopicException("The request attempted to perform an operation on an invalid topic.")),
+    RECORD_LIST_TOO_LARGE(18, new RecordBatchTooLargeException("The request included message batch larger than the configured segment size on the server.")),
+    NOT_ENOUGH_REPLICAS(19, new NotEnoughReplicasException("Messages are rejected since there are fewer in-sync replicas than required.")),
+    NOT_ENOUGH_REPLICAS_AFTER_APPEND(20, new NotEnoughReplicasAfterAppendException("Messages are written to the log, but to fewer in-sync replicas than required."));
     private static Map<Class<?>, Errors> classToError = new HashMap<Class<?>, Errors>();
     private static Map<Short, Errors> codeToError = new HashMap<Short, Errors>();
     static {
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/ProtoUtils.java b/clients/src/main/java/org/apache/kafka/common/protocol/ProtoUtils.java
index 90df5d5971a3f..c2cbbbd7f1f28 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/ProtoUtils.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/ProtoUtils.java
@@ -17,18 +17,10 @@
 package org.apache.kafka.common.protocol;
 
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 
-import org.apache.kafka.common.Cluster;
-import org.apache.kafka.common.Node;
-import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.protocol.types.Schema;
 import org.apache.kafka.common.protocol.types.Struct;
 
-
 public class ProtoUtils {
 
     private static Schema schemaFor(Schema[][] schemas, int apiKey, int version) {
@@ -70,45 +62,4 @@ public static Struct parseResponse(int apiKey, ByteBuffer buffer) {
         return (Struct) currentResponseSchema(apiKey).read(buffer);
     }
 
-    public static Cluster parseMetadataResponse(Struct response) {
-        Map<Integer, Node> brokers = new HashMap<Integer, Node>();
-        Object[] brokerStructs = (Object[]) response.get("brokers");
-        for (int i = 0; i < brokerStructs.length; i++) {
-            Struct broker = (Struct) brokerStructs[i];
-            int nodeId = (Integer) broker.get("node_id");
-            String host = (String) broker.get("host");
-            int port = (Integer) broker.get("port");
-            brokers.put(nodeId, new Node(nodeId, host, port));
-        }
-        List<PartitionInfo> partitions = new ArrayList<PartitionInfo>();
-        Object[] topicInfos = (Object[]) response.get("topic_metadata");
-        for (int i = 0; i < topicInfos.length; i++) {
-            Struct topicInfo = (Struct) topicInfos[i];
-            short topicError = topicInfo.getShort("topic_error_code");
-            if (topicError == Errors.NONE.code()) {
-                String topic = topicInfo.getString("topic");
-                Object[] partitionInfos = (Object[]) topicInfo.get("partition_metadata");
-                for (int j = 0; j < partitionInfos.length; j++) {
-                    Struct partitionInfo = (Struct) partitionInfos[j];
-                    short partError = partitionInfo.getShort("partition_error_code");
-                    if (partError == Errors.NONE.code()) {
-                        int partition = partitionInfo.getInt("partition_id");
-                        int leader = partitionInfo.getInt("leader");
-                        Node leaderNode = leader == -1 ? null : brokers.get(leader);
-                        Object[] replicas = (Object[]) partitionInfo.get("replicas");
-                        Node[] replicaNodes = new Node[replicas.length];
-                        for (int k = 0; k < replicas.length; k++)
-                            replicaNodes[k] = brokers.get(replicas[k]);
-                        Object[] isr = (Object[]) partitionInfo.get("isr");
-                        Node[] isrNodes = new Node[isr.length];
-                        for (int k = 0; k < isr.length; k++)
-                            isrNodes[k] = brokers.get(isr[k]);
-                        partitions.add(new PartitionInfo(topic, partition, leaderNode, replicaNodes, isrNodes));
-                    }
-                }
-            }
-        }
-        return new Cluster(brokers.values(), partitions);
-    }
-
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
index 044b03061802e..7517b879866fc 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
@@ -104,6 +104,264 @@ public class Protocol {
     public static Schema[] PRODUCE_REQUEST = new Schema[] { PRODUCE_REQUEST_V0 };
     public static Schema[] PRODUCE_RESPONSE = new Schema[] { PRODUCE_RESPONSE_V0 };
 
+    /* Offset commit api */
+    public static Schema OFFSET_COMMIT_REQUEST_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                   INT32,
+                                                                                   "Topic partition id."),
+                                                                         new Field("offset",
+                                                                                   INT64,
+                                                                                   "Message offset to be committed."),
+                                                                         new Field("timestamp",
+                                                                                   INT64,
+                                                                                   "Timestamp of the commit"),
+                                                                         new Field("metadata",
+                                                                                   STRING,
+                                                                                   "Any associated metadata the client wants to keep."));
+
+    public static Schema OFFSET_COMMIT_REQUEST_TOPIC_V0 = new Schema(new Field("topic",
+                                                                                STRING,
+                                                                                "Topic to commit."),
+                                                                       new Field("partitions",
+                                                                                 new ArrayOf(OFFSET_COMMIT_REQUEST_PARTITION_V0),
+                                                                                 "Partitions to commit offsets."));
+
+    public static Schema OFFSET_COMMIT_REQUEST_V0 = new Schema(new Field("group_id",
+                                                                         STRING,
+                                                                         "The consumer group id."),
+                                                               new Field("topics",
+                                                                         new ArrayOf(OFFSET_COMMIT_REQUEST_TOPIC_V0),
+                                                                         "Topics to commit offsets."));
+
+    public static Schema OFFSET_COMMIT_REQUEST_V1 = new Schema(new Field("group_id",
+                                                                         STRING,
+                                                                         "The consumer group id."),
+                                                               new Field("group_generation_id",
+                                                                         INT32,
+                                                                         "The generation of the consumer group."),
+                                                               new Field("consumer_id",
+                                                                         STRING,
+                                                                         "The consumer id assigned by the group coordinator."),
+                                                               new Field("topics",
+                                                                         new ArrayOf(OFFSET_COMMIT_REQUEST_TOPIC_V0),
+                                                                         "Topics to commit offsets."));
+
+    public static Schema OFFSET_COMMIT_RESPONSE_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                    INT32,
+                                                                                    "Topic partition id."),
+                                                                          new Field("error_code",
+                                                                                    INT16));
+
+    public static Schema OFFSET_COMMIT_RESPONSE_TOPIC_V0 = new Schema(new Field("topic", STRING),
+                                                                      new Field("partition_responses",
+                                                                                new ArrayOf(OFFSET_COMMIT_RESPONSE_PARTITION_V0)));
+
+    public static Schema OFFSET_COMMIT_RESPONSE_V0 = new Schema(new Field("responses",
+                                                                          new ArrayOf(OFFSET_COMMIT_RESPONSE_TOPIC_V0)));
+
+    public static Schema[] OFFSET_COMMIT_REQUEST = new Schema[] { OFFSET_COMMIT_REQUEST_V0, OFFSET_COMMIT_REQUEST_V1 };
+    /* The response types for both V0 and V1 of OFFSET_COMMIT_REQUEST are the same. */
+    public static Schema[] OFFSET_COMMIT_RESPONSE = new Schema[] { OFFSET_COMMIT_RESPONSE_V0, OFFSET_COMMIT_RESPONSE_V0};
+
+    /* Offset fetch api */
+    public static Schema OFFSET_FETCH_REQUEST_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                  INT32,
+                                                                                  "Topic partition id."));
+
+    public static Schema OFFSET_FETCH_REQUEST_TOPIC_V0 = new Schema(new Field("topic",
+                                                                               STRING,
+                                                                               "Topic to fetch offset."),
+                                                                     new Field("partitions",
+                                                                                new ArrayOf(OFFSET_FETCH_REQUEST_PARTITION_V0),
+                                                                                "Partitions to fetch offsets."));
+
+    public static Schema OFFSET_FETCH_REQUEST_V0 = new Schema(new Field("group_id",
+                                                                        STRING,
+                                                                        "The consumer group id."),
+                                                              new Field("topics",
+                                                                        new ArrayOf(OFFSET_FETCH_REQUEST_TOPIC_V0),
+                                                                        "Topics to fetch offsets."));
+
+    public static Schema OFFSET_FETCH_RESPONSE_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                   INT32,
+                                                                                   "Topic partition id."),
+                                                                         new Field("offset",
+                                                                                   INT64,
+                                                                                   "Last committed message offset."),
+                                                                         new Field("metadata",
+                                                                                   STRING,
+                                                                                   "Any associated metadata the client wants to keep."),
+                                                                         new Field("error_code",
+                                                                                   INT16));
+
+    public static Schema OFFSET_FETCH_RESPONSE_TOPIC_V0 = new Schema(new Field("topic", STRING),
+                                                                     new Field("partition_responses",
+                                                                               new ArrayOf(OFFSET_FETCH_RESPONSE_PARTITION_V0)));
+
+    public static Schema OFFSET_FETCH_RESPONSE_V0 = new Schema(new Field("responses",
+                                                                         new ArrayOf(OFFSET_FETCH_RESPONSE_TOPIC_V0)));
+
+    public static Schema[] OFFSET_FETCH_REQUEST = new Schema[] { OFFSET_FETCH_REQUEST_V0 };
+    public static Schema[] OFFSET_FETCH_RESPONSE = new Schema[] { OFFSET_FETCH_RESPONSE_V0 };
+
+    /* List offset api */
+    public static Schema LIST_OFFSET_REQUEST_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                 INT32,
+                                                                                 "Topic partition id."),
+                                                                       new Field("timestamp",
+                                                                                 INT64,
+                                                                                 "Timestamp."),
+                                                                       new Field("max_num_offsets",
+                                                                                 INT32,
+                                                                                 "Maximum offsets to return."));
+
+    public static Schema LIST_OFFSET_REQUEST_TOPIC_V0 = new Schema(new Field("topic",
+                                                                             STRING,
+                                                                             "Topic to list offset."),
+                                                                   new Field("partitions",
+                                                                             new ArrayOf(LIST_OFFSET_REQUEST_PARTITION_V0),
+                                                                             "Partitions to list offset."));
+
+    public static Schema LIST_OFFSET_REQUEST_V0 = new Schema(new Field("replica_id",
+                                                                       INT32,
+                                                                       "Broker id of the follower. For normal consumers, use -1."),
+                                                             new Field("topics",
+                                                                        new ArrayOf(LIST_OFFSET_REQUEST_TOPIC_V0),
+                                                                        "Topics to list offsets."));
+
+    public static Schema LIST_OFFSET_RESPONSE_PARTITION_V0 = new Schema(new Field("partition",
+                                                                                  INT32,
+                                                                                  "Topic partition id."),
+                                                                        new Field("error_code",
+                                                                                  INT16),
+                                                                        new Field("offsets",
+                                                                                  new ArrayOf(INT64),
+                                                                                  "A list of offsets."));
+
+    public static Schema LIST_OFFSET_RESPONSE_TOPIC_V0 = new Schema(new Field("topic", STRING),
+                                                                    new Field("partition_responses",
+                                                                              new ArrayOf(LIST_OFFSET_RESPONSE_PARTITION_V0)));
+
+    public static Schema LIST_OFFSET_RESPONSE_V0 = new Schema(new Field("responses",
+                                                                  new ArrayOf(LIST_OFFSET_RESPONSE_TOPIC_V0)));
+
+    public static Schema[] LIST_OFFSET_REQUEST = new Schema[] { LIST_OFFSET_REQUEST_V0 };
+    public static Schema[] LIST_OFFSET_RESPONSE = new Schema[] { LIST_OFFSET_RESPONSE_V0 };
+
+    /* Fetch api */
+    public static Schema FETCH_REQUEST_PARTITION_V0 = new Schema(new Field("partition",
+                                                                           INT32,
+                                                                           "Topic partition id."),
+                                                                 new Field("fetch_offset",
+                                                                           INT64,
+                                                                           "Message offset."),
+                                                                 new Field("max_bytes",
+                                                                           INT32,
+                                                                           "Maximum bytes to fetch."));
+
+    public static Schema FETCH_REQUEST_TOPIC_V0 = new Schema(new Field("topic",
+                                                                       STRING,
+                                                                       "Topic to fetch."),
+                                                             new Field("partitions",
+                                                                       new ArrayOf(FETCH_REQUEST_PARTITION_V0),
+                                                                       "Partitions to fetch."));
+
+    public static Schema FETCH_REQUEST_V0 = new Schema(new Field("replica_id",
+                                                                 INT32,
+                                                                 "Broker id of the follower. For normal consumers, use -1."),
+                                                       new Field("max_wait_time",
+                                                                 INT32,
+                                                                 "Maximum time in ms to wait for the response."),
+                                                       new Field("min_bytes",
+                                                                 INT32,
+                                                                 "Minimum bytes to accumulate in the response."),
+                                                       new Field("topics",
+                                                                 new ArrayOf(FETCH_REQUEST_TOPIC_V0),
+                                                                 "Topics to fetch."));
+
+    public static Schema FETCH_RESPONSE_PARTITION_V0 = new Schema(new Field("partition",
+                                                                            INT32,
+                                                                            "Topic partition id."),
+                                                                  new Field("error_code",
+                                                                            INT16),
+                                                                  new Field("high_watermark",
+                                                                            INT64,
+                                                                            "Last committed offset."),
+                                                                  new Field("record_set", BYTES));
+
+    public static Schema FETCH_RESPONSE_TOPIC_V0 = new Schema(new Field("topic", STRING),
+                                                              new Field("partition_responses",
+                                                                        new ArrayOf(FETCH_RESPONSE_PARTITION_V0)));
+
+    public static Schema FETCH_RESPONSE_V0 = new Schema(new Field("responses",
+                                                                  new ArrayOf(FETCH_RESPONSE_TOPIC_V0)));
+
+    public static Schema[] FETCH_REQUEST = new Schema[] { FETCH_REQUEST_V0 };
+    public static Schema[] FETCH_RESPONSE = new Schema[] { FETCH_RESPONSE_V0 };
+
+    /* Consumer metadata api */
+    public static Schema CONSUMER_METADATA_REQUEST_V0 = new Schema(new Field("group_id",
+                                                                             STRING,
+                                                                             "The consumer group id."));
+
+    public static Schema CONSUMER_METADATA_RESPONSE_V0 = new Schema(new Field("error_code",
+                                                                              INT16),
+                                                                    new Field("coordinator",
+                                                                              BROKER,
+                                                                              "Host and port information for the coordinator for a consumer group."));
+
+    public static Schema[] CONSUMER_METADATA_REQUEST = new Schema[] { CONSUMER_METADATA_REQUEST_V0 };
+    public static Schema[] CONSUMER_METADATA_RESPONSE = new Schema[] { CONSUMER_METADATA_RESPONSE_V0 };
+
+    /* Join group api */
+    public static Schema JOIN_GROUP_REQUEST_V0 = new Schema(new Field("group_id",
+                                                                      STRING,
+                                                                      "The consumer group id."),
+                                                            new Field("session_timeout",
+                                                                      INT32,
+                                                                      "The coordinator considers the consumer dead if it receives no heartbeat after this timeout in ms."),
+                                                            new Field("topics",
+                                                                      new ArrayOf(STRING),
+                                                                      "An array of topics to subscribe to."),
+                                                            new Field("consumer_id",
+                                                                      STRING,
+                                                                      "The assigned consumer id or an empty string for a new consumer."),
+                                                            new Field("partition_assignment_strategy",
+                                                                      STRING,
+                                                                      "The strategy for the coordinator to assign partitions."));
+
+    public static Schema JOIN_GROUP_RESPONSE_TOPIC_V0 = new Schema(new Field("topic", STRING),
+                                                                   new Field("partitions", new ArrayOf(INT32)));
+    public static Schema JOIN_GROUP_RESPONSE_V0 = new Schema(new Field("error_code",
+                                                                       INT16),
+                                                             new Field("group_generation_id",
+                                                                       INT32,
+                                                                       "The generation of the consumer group."),
+                                                             new Field("consumer_id",
+                                                                       STRING,
+                                                                       "The consumer id assigned by the group coordinator."),
+                                                             new Field("assigned_partitions",
+                                                                       new ArrayOf(JOIN_GROUP_RESPONSE_TOPIC_V0)));
+
+    public static Schema[] JOIN_GROUP_REQUEST = new Schema[] { JOIN_GROUP_REQUEST_V0 };
+    public static Schema[] JOIN_GROUP_RESPONSE = new Schema[] { JOIN_GROUP_RESPONSE_V0 };
+
+    /* Heartbeat api */
+    public static Schema HEARTBEAT_REQUEST_V0 = new Schema(new Field("group_id",
+                                                                      STRING,
+                                                                      "The consumer group id."),
+                                                            new Field("group_generation_id",
+                                                                      INT32,
+                                                                      "The generation of the consumer group."),
+                                                            new Field("consumer_id",
+                                                                      STRING,
+                                                                      "The consumer id assigned by the group coordinator."));
+
+    public static Schema HEARTBEAT_RESPONSE_V0 = new Schema(new Field("error_code",
+                                                                       INT16));
+
+    public static Schema[] HEARTBEAT_REQUEST = new Schema[] {HEARTBEAT_REQUEST_V0};
+    public static Schema[] HEARTBEAT_RESPONSE = new Schema[] {HEARTBEAT_RESPONSE_V0};
+
     /* an array of all requests and responses with all schema versions */
     public static Schema[][] REQUESTS = new Schema[ApiKeys.MAX_API_KEY + 1][];
     public static Schema[][] RESPONSES = new Schema[ApiKeys.MAX_API_KEY + 1][];
@@ -113,22 +371,28 @@ public class Protocol {
 
     static {
         REQUESTS[ApiKeys.PRODUCE.id] = PRODUCE_REQUEST;
-        REQUESTS[ApiKeys.FETCH.id] = new Schema[] {};
-        REQUESTS[ApiKeys.LIST_OFFSETS.id] = new Schema[] {};
+        REQUESTS[ApiKeys.FETCH.id] = FETCH_REQUEST;
+        REQUESTS[ApiKeys.LIST_OFFSETS.id] = LIST_OFFSET_REQUEST;
         REQUESTS[ApiKeys.METADATA.id] = METADATA_REQUEST;
         REQUESTS[ApiKeys.LEADER_AND_ISR.id] = new Schema[] {};
         REQUESTS[ApiKeys.STOP_REPLICA.id] = new Schema[] {};
-        REQUESTS[ApiKeys.OFFSET_COMMIT.id] = new Schema[] {};
-        REQUESTS[ApiKeys.OFFSET_FETCH.id] = new Schema[] {};
+        REQUESTS[ApiKeys.OFFSET_COMMIT.id] = OFFSET_COMMIT_REQUEST;
+        REQUESTS[ApiKeys.OFFSET_FETCH.id] = OFFSET_FETCH_REQUEST;
+        REQUESTS[ApiKeys.CONSUMER_METADATA.id] = CONSUMER_METADATA_REQUEST;
+        REQUESTS[ApiKeys.JOIN_GROUP.id] = JOIN_GROUP_REQUEST;
+        REQUESTS[ApiKeys.HEARTBEAT.id] = HEARTBEAT_REQUEST;
 
         RESPONSES[ApiKeys.PRODUCE.id] = PRODUCE_RESPONSE;
-        RESPONSES[ApiKeys.FETCH.id] = new Schema[] {};
-        RESPONSES[ApiKeys.LIST_OFFSETS.id] = new Schema[] {};
+        RESPONSES[ApiKeys.FETCH.id] = FETCH_RESPONSE;
+        RESPONSES[ApiKeys.LIST_OFFSETS.id] = LIST_OFFSET_RESPONSE;
         RESPONSES[ApiKeys.METADATA.id] = METADATA_RESPONSE;
         RESPONSES[ApiKeys.LEADER_AND_ISR.id] = new Schema[] {};
         RESPONSES[ApiKeys.STOP_REPLICA.id] = new Schema[] {};
-        RESPONSES[ApiKeys.OFFSET_COMMIT.id] = new Schema[] {};
-        RESPONSES[ApiKeys.OFFSET_FETCH.id] = new Schema[] {};
+        RESPONSES[ApiKeys.OFFSET_COMMIT.id] = OFFSET_COMMIT_RESPONSE;
+        RESPONSES[ApiKeys.OFFSET_FETCH.id] = OFFSET_FETCH_RESPONSE;
+        RESPONSES[ApiKeys.CONSUMER_METADATA.id] = CONSUMER_METADATA_RESPONSE;
+        RESPONSES[ApiKeys.JOIN_GROUP.id] = JOIN_GROUP_RESPONSE;
+        RESPONSES[ApiKeys.HEARTBEAT.id] = HEARTBEAT_RESPONSE;
 
         /* set the maximum version of each api */
         for (ApiKeys api : ApiKeys.values())
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Schema.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Schema.java
index c10d9ef28db01..7164701258666 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Schema.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Schema.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.protocol.types;
 
@@ -51,8 +47,9 @@ public void write(ByteBuffer buffer, Object o) {
                 Object value = f.type().validate(r.get(f));
                 f.type.write(buffer, value);
             } catch (Exception e) {
-                throw new SchemaException("Error writing field '" + f.name + "': " + e.getMessage() == null ? e.getMessage() : e.getClass()
-                                                                                                                                .getName());
+                throw new SchemaException("Error writing field '" + f.name +
+                                          "': " +
+                                          (e.getMessage() == null ? e.getClass().getName() : e.getMessage()));
             }
         }
     }
@@ -62,8 +59,15 @@ public void write(ByteBuffer buffer, Object o) {
      */
     public Object read(ByteBuffer buffer) {
         Object[] objects = new Object[fields.length];
-        for (int i = 0; i < fields.length; i++)
-            objects[i] = fields[i].type.read(buffer);
+        for (int i = 0; i < fields.length; i++) {
+            try {
+                objects[i] = fields[i].type.read(buffer);
+            } catch (Exception e) {
+                throw new SchemaException("Error reading field '" + fields[i].name +
+                                          "': " +
+                                          (e.getMessage() == null ? e.getClass().getName() : e.getMessage()));
+            }
+        }
         return new Struct(this, objects);
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
index 73ad6cdb465ed..121e880a941fc 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
@@ -83,6 +83,15 @@ public Object get(String name) {
         return getFieldOrDefault(field);
     }
 
+    /**
+     * Check if the struct contains a field.
+     * @param name
+     * @return Whether a field exists.
+     */
+    public boolean hasField(String name) {
+        return schema.get(name) != null;
+    }
+
     public Struct getStruct(Field field) {
         return (Struct) get(field);
     }
@@ -107,6 +116,22 @@ public Integer getInt(String name) {
         return (Integer) get(name);
     }
 
+    public Long getLong(Field field) {
+        return (Long) get(field);
+    }
+
+    public Long getLong(String name) {
+        return (Long) get(name);
+    }
+
+    public ByteBuffer getBytes(Field field) {
+        return (ByteBuffer) get(field);
+    }
+
+    public ByteBuffer getBytes(String name) {
+        return (ByteBuffer) get(name);
+    }
+
     public Object[] getArray(Field field) {
         return (Object[]) get(field);
     }
@@ -150,7 +175,9 @@ public Struct set(String name, Object value) {
     }
 
     /**
-     * Create a struct for the schema of a container type (struct or array)
+     * Create a struct for the schema of a container type (struct or array).
+     * Note that for array type, this method assumes that the type is an array of schema and creates a struct
+     * of that schema. Arrays of other types can't be instantiated with this method.
      * 
      * @param field The field to create an instance of
      * @return The struct
@@ -230,9 +257,20 @@ public String toString() {
         StringBuilder b = new StringBuilder();
         b.append('{');
         for (int i = 0; i < this.values.length; i++) {
-            b.append(this.schema.get(i).name);
+            Field f = this.schema.get(i);
+            b.append(f.name);
             b.append('=');
-            b.append(this.values[i]);
+            if (f.type() instanceof ArrayOf) {
+                Object[] arrayValue = (Object[]) this.values[i];
+                b.append('[');
+                for (int j = 0; j < arrayValue.length; j++) {
+                    b.append(arrayValue[j]);
+                    if (j < arrayValue.length - 1)
+                        b.append(',');
+                }
+                b.append(']');
+            } else
+                b.append(this.values[i]);
             if (i < this.values.length - 1)
                 b.append(',');
         }
@@ -240,4 +278,46 @@ public String toString() {
         return b.toString();
     }
 
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        for (int i = 0; i < this.values.length; i++) {
+            Field f = this.schema.get(i);
+            if (f.type() instanceof ArrayOf) {
+                Object[] arrayObject = (Object []) this.get(f);
+                for (Object arrayItem: arrayObject)
+                    result = prime * result + arrayItem.hashCode();
+            } else {
+                result = prime * result + this.get(f).hashCode();
+            }
+        }
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        Struct other = (Struct) obj;
+        if (schema != other.schema)
+            return false;
+        for (int i = 0; i < this.values.length; i++) {
+            Field f = this.schema.get(i);
+            Boolean result;
+            if (f.type() instanceof ArrayOf) {
+                result = Arrays.equals((Object []) this.get(f), (Object []) other.get(f));
+            } else {
+                result = this.get(f).equals(other.get(f));
+            }
+            if (!result)
+                return false;
+        }
+        return true;
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
index 1bd8ce4dfb7c5..f0d5a8286380d 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
@@ -20,7 +20,6 @@
 
 import org.apache.kafka.common.utils.Utils;
 
-
 /**
  * A serializable type
  */
diff --git a/clients/src/main/java/org/apache/kafka/common/record/ByteBufferInputStream.java b/clients/src/main/java/org/apache/kafka/common/record/ByteBufferInputStream.java
new file mode 100644
index 0000000000000..12651d46e7887
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/record/ByteBufferInputStream.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.record;
+
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * A byte buffer backed input outputStream
+ */
+public class ByteBufferInputStream extends InputStream {
+
+    private ByteBuffer buffer;
+
+    public ByteBufferInputStream(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    public int read() {
+        if (!buffer.hasRemaining()) {
+            return -1;
+        }
+        return buffer.get() & 0xFF;
+    }
+
+    public int read(byte[] bytes, int off, int len) {
+        if (!buffer.hasRemaining()) {
+            return -1;
+        }
+
+        len = Math.min(len, buffer.remaining());
+        buffer.get(bytes, off, len);
+        return len;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/record/ByteBufferOutputStream.java b/clients/src/main/java/org/apache/kafka/common/record/ByteBufferOutputStream.java
new file mode 100644
index 0000000000000..c7bd2f8852bd9
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/record/ByteBufferOutputStream.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.record;
+
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * A byte buffer backed output outputStream
+ */
+public class ByteBufferOutputStream extends OutputStream {
+
+    private static float REALLOCATION_FACTOR = 1.1f;
+
+    private ByteBuffer buffer;
+
+    public ByteBufferOutputStream(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    public void write(int b) {
+        if (buffer.remaining() < 1)
+            expandBuffer(buffer.capacity() + 1);
+        buffer.put((byte) b);
+    }
+
+    public void write(byte[] bytes, int off, int len) {
+        if (buffer.remaining() < len)
+            expandBuffer(buffer.capacity() + len);
+        buffer.put(bytes, off, len);
+    }
+
+    public ByteBuffer buffer() {
+        return buffer;
+    }
+
+    private void expandBuffer(int size) {
+        int expandSize = Math.max((int) (buffer.capacity() * REALLOCATION_FACTOR), size);
+        ByteBuffer temp = ByteBuffer.allocate(expandSize);
+        temp.put(buffer.array(), buffer.arrayOffset(), buffer.position());
+        buffer = temp;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java b/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
index 906da02d02c03..65a7e4323793d 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
@@ -20,14 +20,16 @@
  * The compression type to use
  */
 public enum CompressionType {
-    NONE(0, "none"), GZIP(1, "gzip"), SNAPPY(2, "snappy");
+    NONE(0, "none", 1.0f), GZIP(1, "gzip", 0.5f), SNAPPY(2, "snappy", 0.5f), LZ4(3, "lz4", 0.5f);
 
     public final int id;
     public final String name;
+    public final float rate;
 
-    private CompressionType(int id, String name) {
+    private CompressionType(int id, String name, float rate) {
         this.id = id;
         this.name = name;
+        this.rate = rate;
     }
 
     public static CompressionType forId(int id) {
@@ -38,6 +40,8 @@ public static CompressionType forId(int id) {
                 return GZIP;
             case 2:
                 return SNAPPY;
+            case 3:
+                return LZ4;
             default:
                 throw new IllegalArgumentException("Unknown compression type id: " + id);
         }
@@ -50,7 +54,10 @@ else if (GZIP.name.equals(name))
             return GZIP;
         else if (SNAPPY.name.equals(name))
             return SNAPPY;
+        else if (LZ4.name.equals(name))
+            return LZ4;
         else
             throw new IllegalArgumentException("Unknown compression name: " + name);
     }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java
new file mode 100644
index 0000000000000..d684e6833bd81
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.record;
+
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.utils.Utils;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+public class Compressor {
+
+    static private final float COMPRESSION_RATE_DAMPING_FACTOR = 0.9f;
+    static private final float COMPRESSION_RATE_ESTIMATION_FACTOR = 1.05f;
+    static private final int COMPRESSION_DEFAULT_BUFFER_SIZE = 1024;
+
+    private static float[] typeToRate;
+    private static int MAX_TYPE_ID = -1;
+
+    static {
+        for (CompressionType type : CompressionType.values()) {
+            MAX_TYPE_ID = Math.max(MAX_TYPE_ID, type.id);
+        }
+        typeToRate = new float[MAX_TYPE_ID+1];
+        for (CompressionType type : CompressionType.values()) {
+            typeToRate[type.id] = type.rate;
+        }
+    }
+
+    private final CompressionType type;
+    private final DataOutputStream appendStream;
+    private final ByteBufferOutputStream bufferStream;
+    private final int initPos;
+
+    public long writtenUncompressed;
+    public long numRecords;
+
+    public Compressor(ByteBuffer buffer, CompressionType type, int blockSize) {
+        this.type = type;
+        this.initPos = buffer.position();
+
+        this.numRecords = 0;
+        this.writtenUncompressed = 0;
+
+        if (type != CompressionType.NONE) {
+            // for compressed records, leave space for the header and the shallow message metadata
+            // and move the starting position to the value payload offset
+            buffer.position(initPos + Records.LOG_OVERHEAD + Record.RECORD_OVERHEAD);
+        }
+
+        // create the stream
+        bufferStream = new ByteBufferOutputStream(buffer);
+        appendStream = wrapForOutput(bufferStream, type, blockSize);
+    }
+
+    public Compressor(ByteBuffer buffer, CompressionType type) {
+        this(buffer, type, COMPRESSION_DEFAULT_BUFFER_SIZE);
+    }
+
+    public ByteBuffer buffer() {
+        return bufferStream.buffer();
+    }
+    
+    public double compressionRate() {
+        ByteBuffer buffer = bufferStream.buffer();
+        if (this.writtenUncompressed == 0)
+            return 1.0;
+        else
+            return (double) buffer.position() / this.writtenUncompressed;
+    }
+
+    public void close() {
+        try {
+            appendStream.close();
+        } catch (IOException e) {
+            throw new KafkaException(e);
+        }
+
+        if (type != CompressionType.NONE) {
+            ByteBuffer buffer = bufferStream.buffer();
+            int pos = buffer.position();
+            // write the header, for the end offset write as number of records - 1
+            buffer.position(initPos);
+            buffer.putLong(numRecords - 1);
+            buffer.putInt(pos - initPos - Records.LOG_OVERHEAD);
+            // write the shallow message (the crc and value size are not correct yet)
+            Record.write(buffer, null, null, type, 0, -1);
+            // compute the fill the value size
+            int valueSize = pos - initPos - Records.LOG_OVERHEAD - Record.RECORD_OVERHEAD;
+            buffer.putInt(initPos + Records.LOG_OVERHEAD + Record.KEY_OFFSET, valueSize);
+            // compute and fill the crc at the beginning of the message
+            long crc = Record.computeChecksum(buffer,
+                initPos + Records.LOG_OVERHEAD + Record.MAGIC_OFFSET,
+                pos - initPos - Records.LOG_OVERHEAD - Record.MAGIC_OFFSET);
+            Utils.writeUnsignedInt(buffer, initPos + Records.LOG_OVERHEAD + Record.CRC_OFFSET, crc);
+            // reset the position
+            buffer.position(pos);
+
+            // update the compression ratio
+            float compressionRate = (float) buffer.position() / this.writtenUncompressed;
+            typeToRate[type.id] = typeToRate[type.id] * COMPRESSION_RATE_DAMPING_FACTOR +
+                compressionRate * (1 - COMPRESSION_RATE_DAMPING_FACTOR);
+        }
+    }
+
+    // Note that for all the write operations below, IO exceptions should
+    // never be thrown since the underlying ByteBufferOutputStream does not throw IOException;
+    // therefore upon encountering this issue we just close the append stream.
+
+    public void putLong(final long value) {
+        try {
+            appendStream.writeLong(value);
+        } catch (IOException e) {
+            throw new KafkaException("I/O exception when writing to the append stream, closing", e);
+        }
+    }
+
+    public void putInt(final int value) {
+        try {
+            appendStream.writeInt(value);
+        } catch (IOException e) {
+            throw new KafkaException("I/O exception when writing to the append stream, closing", e);
+        }
+    }
+
+    public void put(final ByteBuffer buffer) {
+        try {
+            appendStream.write(buffer.array(), buffer.arrayOffset(), buffer.limit());
+        } catch (IOException e) {
+            throw new KafkaException("I/O exception when writing to the append stream, closing", e);
+        }
+    }
+
+    public void putByte(final byte value) {
+        try {
+            appendStream.write(value);
+        } catch (IOException e) {
+            throw new KafkaException("I/O exception when writing to the append stream, closing", e);
+        }
+    }
+
+    public void put(final byte[] bytes, final int offset, final int len) {
+        try {
+            appendStream.write(bytes, offset, len);
+        } catch (IOException e) {
+            throw new KafkaException("I/O exception when writing to the append stream, closing", e);
+        }
+    }
+
+    public void putRecord(byte[] key, byte[] value, CompressionType type, int valueOffset, int valueSize) {
+        // put a record as un-compressed into the underlying stream
+        long crc = Record.computeChecksum(key, value, type, valueOffset, valueSize);
+        byte attributes = Record.computeAttributes(type);
+        putRecord(crc, attributes, key, value, valueOffset, valueSize);
+    }
+
+    public void putRecord(byte[] key, byte[] value) {
+        putRecord(key, value, CompressionType.NONE, 0, -1);
+    }
+
+    private void putRecord(final long crc, final byte attributes, final byte[] key, final byte[] value, final int valueOffset, final int valueSize) {
+        Record.write(this, crc, attributes, key, value, valueOffset, valueSize);
+    }
+
+    public void recordWritten(int size) {
+        numRecords += 1;
+        writtenUncompressed += size;
+    }
+
+    public long estimatedBytesWritten() {
+        if (type == CompressionType.NONE) {
+            return bufferStream.buffer().position();
+        } else {
+            // estimate the written bytes to the underlying byte buffer based on uncompressed written bytes
+            return (long) (writtenUncompressed * typeToRate[type.id] * COMPRESSION_RATE_ESTIMATION_FACTOR);
+        }
+    }
+
+    // the following two functions also need to be public since they are used in MemoryRecords.iteration
+
+    static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, CompressionType type, int bufferSize) {
+        try {
+            switch (type) {
+                case NONE:
+                    return new DataOutputStream(buffer);
+                case GZIP:
+                    return new DataOutputStream(new GZIPOutputStream(buffer, bufferSize));
+                case SNAPPY:
+                    // dynamically load the snappy class to avoid runtime dependency
+                    // on snappy if we are not using it
+                    try {
+                        Class SnappyOutputStream = Class.forName("org.xerial.snappy.SnappyOutputStream");
+                        OutputStream stream = (OutputStream) SnappyOutputStream.getConstructor(OutputStream.class, Integer.TYPE)
+                            .newInstance(buffer, bufferSize);
+                        return new DataOutputStream(stream);
+                    } catch (Exception e) {
+                        throw new KafkaException(e);
+                    }
+                case LZ4:
+                    try {
+                        Class outputStreamClass = Class.forName("org.apache.kafka.common.message.KafkaLZ4BlockOutputStream");
+                        OutputStream stream = (OutputStream) outputStreamClass.getConstructor(OutputStream.class)
+                            .newInstance(buffer);
+                        return new DataOutputStream(stream);
+                    } catch (Exception e) {
+                        throw new KafkaException(e);
+                    }
+                default:
+                    throw new IllegalArgumentException("Unknown compression type: " + type);
+            }
+        } catch (IOException e) {
+            throw new KafkaException(e);
+        }
+    }
+
+    static public DataInputStream wrapForInput(ByteBufferInputStream buffer, CompressionType type) {
+        try {
+            switch (type) {
+                case NONE:
+                    return new DataInputStream(buffer);
+                case GZIP:
+                    return new DataInputStream(new GZIPInputStream(buffer));
+                case SNAPPY:
+                    // dynamically load the snappy class to avoid runtime dependency
+                    // on snappy if we are not using it
+                    try {
+                        Class SnappyInputStream = Class.forName("org.xerial.snappy.SnappyInputStream");
+                        InputStream stream = (InputStream) SnappyInputStream.getConstructor(InputStream.class)
+                            .newInstance(buffer);
+                        return new DataInputStream(stream);
+                    } catch (Exception e) {
+                        throw new KafkaException(e);
+                    }
+                case LZ4:
+                    // dynamically load LZ4 class to avoid runtime dependency
+                    try {
+                        Class inputStreamClass = Class.forName("org.apache.kafka.common.message.KafkaLZ4BlockInputStream");
+                        InputStream stream = (InputStream) inputStreamClass.getConstructor(InputStream.class)
+                            .newInstance(buffer);
+                        return new DataInputStream(stream);
+                    } catch (Exception e) {
+                        throw new KafkaException(e);
+                    }
+                default:
+                    throw new IllegalArgumentException("Unknown compression type: " + type);
+            }
+        } catch (IOException e) {
+            throw new KafkaException(e);
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java
index 9d8935fa3beeb..040e5b91005ed 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java
@@ -1,68 +1,124 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.record;
 
+import java.io.DataInputStream;
+import java.io.EOFException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.GatheringByteChannel;
 import java.util.Iterator;
 
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.utils.AbstractIterator;
 
-
 /**
  * A {@link Records} implementation backed by a ByteBuffer.
  */
 public class MemoryRecords implements Records {
 
-    private final ByteBuffer buffer;
+    private final Compressor compressor;
+    private final int capacity;
+    private final int sizeLimit;
+    private ByteBuffer buffer;
+    private boolean writable;
+
+    // Construct a writable memory records
+    private MemoryRecords(ByteBuffer buffer, CompressionType type, boolean writable, int sizeLimit) {
+        this.writable = writable;
+        this.capacity = buffer.capacity();
+        this.sizeLimit = sizeLimit;
+        if (this.writable) {
+            this.buffer = null;
+            this.compressor = new Compressor(buffer, type);
+        } else {
+            this.buffer = buffer;
+            this.compressor = null;
+        }
+    }
+
+    public static MemoryRecords emptyRecords(ByteBuffer buffer, CompressionType type, int capacity) {
+        return new MemoryRecords(buffer, type, true, capacity);
+    }
 
-    public MemoryRecords(int size) {
-        this(ByteBuffer.allocate(size));
+    public static MemoryRecords emptyRecords(ByteBuffer buffer, CompressionType type) {
+        return emptyRecords(buffer, type, buffer.capacity());
     }
 
-    public MemoryRecords(ByteBuffer buffer) {
-        this.buffer = buffer;
+    public static MemoryRecords iterableRecords(ByteBuffer buffer) {
+        return new MemoryRecords(buffer, CompressionType.NONE, false, buffer.capacity());
     }
 
     /**
      * Append the given record and offset to the buffer
      */
     public void append(long offset, Record record) {
-        buffer.putLong(offset);
-        buffer.putInt(record.size());
-        buffer.put(record.buffer());
+        if (!writable)
+            throw new IllegalStateException("Memory records is not writable");
+
+        int size = record.size();
+        compressor.putLong(offset);
+        compressor.putInt(size);
+        compressor.put(record.buffer());
+        compressor.recordWritten(size + Records.LOG_OVERHEAD);
         record.buffer().rewind();
     }
 
     /**
      * Append a new record and offset to the buffer
      */
-    public void append(long offset, byte[] key, byte[] value, CompressionType type) {
-        buffer.putLong(offset);
-        buffer.putInt(Record.recordSize(key, value));
-        Record.write(this.buffer, key, value, type);
+    public void append(long offset, byte[] key, byte[] value) {
+        if (!writable)
+            throw new IllegalStateException("Memory records is not writable");
+
+        int size = Record.recordSize(key, value);
+        compressor.putLong(offset);
+        compressor.putInt(size);
+        compressor.putRecord(key, value);
+        compressor.recordWritten(size + Records.LOG_OVERHEAD);
     }
 
     /**
      * Check if we have room for a new record containing the given key/value pair
+     * 
+     * Note that the return value is based on the estimate of the bytes written to the compressor, which may not be
+     * accurate if compression is really used. When this happens, the following append may cause dynamic buffer
+     * re-allocation in the underlying byte buffer stream.
+     *
+     * Also note that besides the records' capacity, there is also a size limit for the batch. This size limit may be
+     * smaller than the capacity (e.g. when appending a single message whose size is larger than the batch size, the
+     * capacity will be the message size, but the size limit will still be the batch size), and when the records' size has
+     * exceed this limit we also mark this record as full.
      */
     public boolean hasRoomFor(byte[] key, byte[] value) {
-        return this.buffer.remaining() >= Records.LOG_OVERHEAD + Record.recordSize(key, value);
+        return this.writable &&
+            this.capacity >= this.compressor.estimatedBytesWritten() + Records.LOG_OVERHEAD + Record.recordSize(key, value) &&
+            this.sizeLimit >= this.compressor.estimatedBytesWritten();
+    }
+
+    public boolean isFull() {
+        return !this.writable ||
+            this.capacity <= this.compressor.estimatedBytesWritten() ||
+            this.sizeLimit <= this.compressor.estimatedBytesWritten();
+    }
+
+    /**
+     * Close this batch for no more appends
+     */
+    public void close() {
+        compressor.close();
+        writable = false;
+        buffer = compressor.buffer();
     }
 
     /** Write the records in this set to the given channel */
@@ -74,7 +130,24 @@ public int writeTo(GatheringByteChannel channel) throws IOException {
      * The size of this record set
      */
     public int sizeInBytes() {
-        return this.buffer.position();
+        return compressor.buffer().position();
+    }
+    
+    /**
+     * The compression rate of this record set
+     */
+    public double compressionRate() {
+        if (compressor == null)
+            return 1.0;
+        else
+            return compressor.compressionRate();
+    }
+
+    /**
+     * Return the capacity of the buffer
+     */
+    public int capacity() {
+        return this.capacity;
     }
 
     /**
@@ -86,34 +159,79 @@ public ByteBuffer buffer() {
 
     @Override
     public Iterator<LogEntry> iterator() {
-        return new RecordsIterator(this.buffer);
+        ByteBuffer copy = (ByteBuffer) this.buffer.duplicate().flip();
+        return new RecordsIterator(copy, CompressionType.NONE, false);
     }
 
-    /* TODO: allow reuse of the buffer used for iteration */
     public static class RecordsIterator extends AbstractIterator<LogEntry> {
         private final ByteBuffer buffer;
-
-        public RecordsIterator(ByteBuffer buffer) {
-            ByteBuffer copy = buffer.duplicate();
-            copy.flip();
-            this.buffer = copy;
+        private final DataInputStream stream;
+        private final CompressionType type;
+        private final boolean shallow;
+        private RecordsIterator innerIter;
+
+        public RecordsIterator(ByteBuffer buffer, CompressionType type, boolean shallow) {
+            this.type = type;
+            this.buffer = buffer;
+            this.shallow = shallow;
+            stream = Compressor.wrapForInput(new ByteBufferInputStream(this.buffer), type);
         }
 
+        /*
+         * Read the next record from the buffer.
+         * 
+         * Note that in the compressed message set, each message value size is set as the size of the un-compressed
+         * version of the message value, so when we do de-compression allocating an array of the specified size for
+         * reading compressed value data is sufficient.
+         */
         @Override
         protected LogEntry makeNext() {
-            if (buffer.remaining() < Records.LOG_OVERHEAD)
-                return allDone();
-            long offset = buffer.getLong();
-            int size = buffer.getInt();
-            if (size < 0)
-                throw new IllegalStateException("Record with size " + size);
-            if (buffer.remaining() < size)
-                return allDone();
-            ByteBuffer rec = buffer.slice();
-            rec.limit(size);
-            this.buffer.position(this.buffer.position() + size);
-            return new LogEntry(offset, new Record(rec));
+            if (innerDone()) {
+                try {
+                    // read the offset
+                    long offset = stream.readLong();
+                    // read record size
+                    int size = stream.readInt();
+                    if (size < 0)
+                        throw new IllegalStateException("Record with size " + size);
+                    // read the record, if compression is used we cannot depend on size
+                    // and hence has to do extra copy
+                    ByteBuffer rec;
+                    if (type == CompressionType.NONE) {
+                        rec = buffer.slice();
+                        buffer.position(buffer.position() + size);
+                        rec.limit(size);
+                    } else {
+                        byte[] recordBuffer = new byte[size];
+                        stream.read(recordBuffer, 0, size);
+                        rec = ByteBuffer.wrap(recordBuffer);
+                    }
+                    LogEntry entry = new LogEntry(offset, new Record(rec));
+                    entry.record().ensureValid();
+
+                    // decide whether to go shallow or deep iteration if it is compressed
+                    CompressionType compression = entry.record().compressionType();
+                    if (compression == CompressionType.NONE || shallow) {
+                        return entry;
+                    } else {
+                        // init the inner iterator with the value payload of the message,
+                        // which will de-compress the payload to a set of messages
+                        ByteBuffer value = entry.record().value();
+                        innerIter = new RecordsIterator(value, compression, true);
+                        return innerIter.next();
+                    }
+                } catch (EOFException e) {
+                    return allDone();
+                } catch (IOException e) {
+                    throw new KafkaException(e);
+                }
+            } else {
+                return innerIter.next();
+            }
         }
-    }
 
+        private boolean innerDone() {
+            return (innerIter == null || !innerIter.hasNext());
+        }
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/record/Record.java b/clients/src/main/java/org/apache/kafka/common/record/Record.java
index f1dc9778502cb..10df9fd8d3f4e 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/Record.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/Record.java
@@ -18,6 +18,7 @@
 
 import java.nio.ByteBuffer;
 
+import org.apache.kafka.common.utils.Crc32;
 import org.apache.kafka.common.utils.Utils;
 
 
@@ -40,13 +41,15 @@ public final class Record {
     public static final int KEY_OFFSET = KEY_SIZE_OFFSET + KEY_SIZE_LENGTH;
     public static final int VALUE_SIZE_LENGTH = 4;
 
-    /** The amount of overhead bytes in a record */
-    public static final int RECORD_OVERHEAD = KEY_OFFSET + VALUE_SIZE_LENGTH;
+    /**
+     * The size for the record header
+     */
+    public static final int HEADER_SIZE = CRC_LENGTH + MAGIC_LENGTH + ATTRIBUTE_LENGTH;
 
     /**
-     * The minimum valid size for the record header
+     * The amount of overhead bytes in a record
      */
-    public static final int MIN_HEADER_SIZE = CRC_LENGTH + MAGIC_LENGTH + ATTRIBUTE_LENGTH + KEY_SIZE_LENGTH + VALUE_SIZE_LENGTH;
+    public static final int RECORD_OVERHEAD = HEADER_SIZE + KEY_SIZE_LENGTH + VALUE_SIZE_LENGTH;
 
     /**
      * The current "magic" value
@@ -54,10 +57,10 @@ public final class Record {
     public static final byte CURRENT_MAGIC_VALUE = 0;
 
     /**
-     * Specifies the mask for the compression code. 2 bits to hold the compression codec. 0 is reserved to indicate no
+     * Specifies the mask for the compression code. 3 bits to hold the compression codec. 0 is reserved to indicate no
      * compression
      */
-    public static final int COMPRESSION_CODEC_MASK = 0x03;
+    public static final int COMPRESSION_CODEC_MASK = 0x07;
 
     /**
      * Compression code for uncompressed records
@@ -71,27 +74,29 @@ public Record(ByteBuffer buffer) {
     }
 
     /**
-     * A constructor to create a LogRecord
+     * A constructor to create a LogRecord. If the record's compression type is not none, then
+     * its value payload should be already compressed with the specified type; the constructor
+     * would always write the value payload as is and will not do the compression itself.
      * 
      * @param key The key of the record (null, if none)
      * @param value The record value
-     * @param codec The compression codec used on the contents of the record (if any)
+     * @param type The compression type used on the contents of the record (if any)
      * @param valueOffset The offset into the payload array used to extract payload
      * @param valueSize The size of the payload to use
      */
-    public Record(byte[] key, byte[] value, CompressionType codec, int valueOffset, int valueSize) {
-        this(ByteBuffer.allocate(recordSize(key == null ? 0 : key.length, value == null ? 0 : valueSize >= 0 ? valueSize
-                                                                                                            : value.length - valueOffset)));
-        write(this.buffer, key, value, codec, valueOffset, valueSize);
+    public Record(byte[] key, byte[] value, CompressionType type, int valueOffset, int valueSize) {
+        this(ByteBuffer.allocate(recordSize(key == null ? 0 : key.length,
+            value == null ? 0 : valueSize >= 0 ? valueSize : value.length - valueOffset)));
+        write(this.buffer, key, value, type, valueOffset, valueSize);
         this.buffer.rewind();
     }
 
-    public Record(byte[] key, byte[] value, CompressionType codec) {
-        this(key, value, codec, 0, -1);
+    public Record(byte[] key, byte[] value, CompressionType type) {
+        this(key, value, type, 0, -1);
     }
 
-    public Record(byte[] value, CompressionType codec) {
-        this(null, value, codec);
+    public Record(byte[] value, CompressionType type) {
+        this(null, value, type);
     }
 
     public Record(byte[] key, byte[] value) {
@@ -102,40 +107,37 @@ public Record(byte[] value) {
         this(null, value, CompressionType.NONE);
     }
 
-    public static void write(ByteBuffer buffer, byte[] key, byte[] value, CompressionType codec, int valueOffset, int valueSize) {
-        // skip crc, we will fill that in at the end
-        int pos = buffer.position();
-        buffer.position(pos + MAGIC_OFFSET);
-        buffer.put(CURRENT_MAGIC_VALUE);
-        byte attributes = 0;
-        if (codec.id > 0)
-            attributes = (byte) (attributes | (COMPRESSION_CODEC_MASK & codec.id));
-        buffer.put(attributes);
+    // Write a record to the buffer, if the record's compression type is none, then
+    // its value payload should be already compressed with the specified type
+    public static void write(ByteBuffer buffer, byte[] key, byte[] value, CompressionType type, int valueOffset, int valueSize) {
+        // construct the compressor with compression type none since this function will not do any
+        //compression according to the input type, it will just write the record's payload as is
+        Compressor compressor = new Compressor(buffer, CompressionType.NONE, buffer.capacity());
+        compressor.putRecord(key, value, type, valueOffset, valueSize);
+    }
+
+    public static void write(Compressor compressor, long crc, byte attributes, byte[] key, byte[] value, int valueOffset, int valueSize) {
+        // write crc
+        compressor.putInt((int) (crc & 0xffffffffL));
+        // write magic value
+        compressor.putByte(CURRENT_MAGIC_VALUE);
+        // write attributes
+        compressor.putByte(attributes);
         // write the key
         if (key == null) {
-            buffer.putInt(-1);
+            compressor.putInt(-1);
         } else {
-            buffer.putInt(key.length);
-            buffer.put(key, 0, key.length);
+            compressor.putInt(key.length);
+            compressor.put(key, 0, key.length);
         }
         // write the value
         if (value == null) {
-            buffer.putInt(-1);
+            compressor.putInt(-1);
         } else {
             int size = valueSize >= 0 ? valueSize : (value.length - valueOffset);
-            buffer.putInt(size);
-            buffer.put(value, valueOffset, size);
+            compressor.putInt(size);
+            compressor.put(value, valueOffset, size);
         }
-
-        // now compute the checksum and fill it in
-        long crc = computeChecksum(buffer,
-                                   buffer.arrayOffset() + pos + MAGIC_OFFSET,
-                                   buffer.position() - pos - MAGIC_OFFSET - buffer.arrayOffset());
-        Utils.writeUnsignedInt(buffer, pos + CRC_OFFSET, crc);
-    }
-
-    public static void write(ByteBuffer buffer, byte[] key, byte[] value, CompressionType codec) {
-        write(buffer, key, value, codec, 0, -1);
     }
 
     public static int recordSize(byte[] key, byte[] value) {
@@ -150,13 +152,51 @@ public ByteBuffer buffer() {
         return this.buffer;
     }
 
+    public static byte computeAttributes(CompressionType type) {
+        byte attributes = 0;
+        if (type.id > 0)
+            attributes = (byte) (attributes | (COMPRESSION_CODEC_MASK & type.id));
+        return attributes;
+    }
+
     /**
      * Compute the checksum of the record from the record contents
      */
     public static long computeChecksum(ByteBuffer buffer, int position, int size) {
-        return Utils.crc32(buffer.array(), buffer.arrayOffset() + position, size - buffer.arrayOffset());
+        Crc32 crc = new Crc32();
+        crc.update(buffer.array(), buffer.arrayOffset() + position, size);
+        return crc.getValue();
+    }
+
+    /**
+     * Compute the checksum of the record from the attributes, key and value payloads
+     */
+    public static long computeChecksum(byte[] key, byte[] value, CompressionType type, int valueOffset, int valueSize) {
+        Crc32 crc = new Crc32();
+        crc.update(CURRENT_MAGIC_VALUE);
+        byte attributes = 0;
+        if (type.id > 0)
+            attributes = (byte) (attributes | (COMPRESSION_CODEC_MASK & type.id));
+        crc.update(attributes);
+        // update for the key
+        if (key == null) {
+            crc.updateInt(-1);
+        } else {
+            crc.updateInt(key.length);
+            crc.update(key, 0, key.length);
+        }
+        // update for the value
+        if (value == null) {
+            crc.updateInt(-1);
+        } else {
+            int size = valueSize >= 0 ? valueSize : (value.length - valueOffset);
+            crc.updateInt(size);
+            crc.update(value, valueOffset, size);
+        }
+        return crc.getValue();
     }
 
+
     /**
      * Compute the checksum of the record from the record contents
      */
@@ -239,7 +279,7 @@ public byte attributes() {
     }
 
     /**
-     * The compression codec used with this record
+     * The compression type used with this record
      */
     public CompressionType compressionType() {
         return CompressionType.forId(buffer.get(ATTRIBUTES_OFFSET) & COMPRESSION_CODEC_MASK);
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequestResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequestResponse.java
new file mode 100644
index 0000000000000..37aff6c0fd2ec
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequestResponse.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+
+public abstract class AbstractRequestResponse {
+    protected final Struct struct;
+
+
+    public AbstractRequestResponse(Struct struct) {
+        this.struct = struct;
+    }
+
+    public Struct toStruct() {
+        return struct;
+    }
+
+    /**
+     * Get the serialized size of this object
+     */
+    public int sizeOf() {
+        return struct.sizeOf();
+    }
+
+    /**
+     * Write this object to a buffer
+     */
+    public void writeTo(ByteBuffer buffer) {
+        struct.writeTo(buffer);
+    }
+
+    @Override
+    public String toString() {
+        return struct.toString();
+    }
+
+    @Override
+    public int hashCode() {
+        return struct.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        AbstractRequestResponse other = (AbstractRequestResponse) obj;
+        return struct.equals(other.struct);
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataRequest.java
new file mode 100644
index 0000000000000..99b52c23d639d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataRequest.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+
+public class ConsumerMetadataRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.CONSUMER_METADATA.id);
+    private static String GROUP_ID_KEY_NAME = "group_id";
+
+    private final String groupId;
+
+    public ConsumerMetadataRequest(String groupId) {
+        super(new Struct(curSchema));
+
+        struct.set(GROUP_ID_KEY_NAME, groupId);
+        this.groupId = groupId;
+    }
+
+    public ConsumerMetadataRequest(Struct struct) {
+        super(struct);
+        groupId = struct.getString(GROUP_ID_KEY_NAME);
+    }
+
+    public String groupId() {
+        return groupId;
+    }
+
+    public static ConsumerMetadataRequest parse(ByteBuffer buffer) {
+        return new ConsumerMetadataRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataResponse.java
new file mode 100644
index 0000000000000..8b8f591c4b280
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ConsumerMetadataResponse.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+
+public class ConsumerMetadataResponse extends AbstractRequestResponse {
+    private static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.CONSUMER_METADATA.id);
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+    private static String COORDINATOR_KEY_NAME = "coordinator";
+
+    // coordinator level field names
+    private static String NODE_ID_KEY_NAME = "node_id";
+    private static String HOST_KEY_NAME = "host";
+    private static String PORT_KEY_NAME = "port";
+
+    private final short errorCode;
+    private final Node node;
+
+    public ConsumerMetadataResponse(short errorCode, Node node) {
+        super(new Struct(curSchema));
+        struct.set(ERROR_CODE_KEY_NAME, errorCode);
+        Struct coordinator = struct.instance(COORDINATOR_KEY_NAME);
+        coordinator.set(NODE_ID_KEY_NAME, node.id());
+        coordinator.set(HOST_KEY_NAME, node.host());
+        coordinator.set(PORT_KEY_NAME, node.port());
+        struct.set(COORDINATOR_KEY_NAME, coordinator);
+        this.errorCode = errorCode;
+        this.node = node;
+    }
+
+    public ConsumerMetadataResponse(Struct struct) {
+        super(struct);
+        errorCode = struct.getShort(ERROR_CODE_KEY_NAME);
+        Struct broker = (Struct) struct.get(COORDINATOR_KEY_NAME);
+        int nodeId = broker.getInt(NODE_ID_KEY_NAME);
+        String host = broker.getString(HOST_KEY_NAME);
+        int port = broker.getInt(PORT_KEY_NAME);
+        node = new Node(nodeId, host, port);
+    }
+
+    public short errorCode() {
+        return errorCode;
+    }
+
+    public Node node() {
+        return node;
+    }
+
+    public static ConsumerMetadataResponse parse(ByteBuffer buffer) {
+        return new ConsumerMetadataResponse(((Struct) curSchema.read(buffer)));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java
new file mode 100644
index 0000000000000..2fc471f64f435
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class FetchRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.FETCH.id);
+    private static String REPLICA_ID_KEY_NAME = "replica_id";
+    private static String MAX_WAIT_KEY_NAME = "max_wait_time";
+    private static String MIN_BYTES_KEY_NAME = "min_bytes";
+    private static String TOPICS_KEY_NAME = "topics";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partitions";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String FETCH_OFFSET_KEY_NAME = "fetch_offset";
+    private static String MAX_BYTES_KEY_NAME = "max_bytes";
+
+    private final int replicaId;
+    private final int maxWait;
+    private final int minBytes;
+    private final Map<TopicPartition, PartitionData> fetchData;
+
+    public static final class PartitionData {
+        public final long offset;
+        public final int maxBytes;
+
+        public PartitionData(long offset, int maxBytes) {
+            this.offset = offset;
+            this.maxBytes = maxBytes;
+        }
+    }
+
+    public FetchRequest(int replicaId, int maxWait, int minBytes, Map<TopicPartition, PartitionData> fetchData) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(fetchData);
+
+        struct.set(REPLICA_ID_KEY_NAME, replicaId);
+        struct.set(MAX_WAIT_KEY_NAME, maxWait);
+        struct.set(MIN_BYTES_KEY_NAME, minBytes);
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> topicEntry: topicsData.entrySet()) {
+            Struct topicData = struct.instance(TOPICS_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, topicEntry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : topicEntry.getValue().entrySet()) {
+                PartitionData fetchPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(FETCH_OFFSET_KEY_NAME, fetchPartitionData.offset);
+                partitionData.set(MAX_BYTES_KEY_NAME, fetchPartitionData.maxBytes);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(TOPICS_KEY_NAME, topicArray.toArray());
+        this.replicaId = replicaId;
+        this.maxWait = maxWait;
+        this.minBytes = minBytes;
+        this.fetchData = fetchData;
+    }
+
+    public FetchRequest(Struct struct) {
+        super(struct);
+        replicaId = struct.getInt(REPLICA_ID_KEY_NAME);
+        maxWait = struct.getInt(MAX_WAIT_KEY_NAME);
+        minBytes = struct.getInt(MIN_BYTES_KEY_NAME);
+        fetchData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(TOPICS_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                long offset = partitionResponse.getLong(FETCH_OFFSET_KEY_NAME);
+                int maxBytes = partitionResponse.getInt(MAX_BYTES_KEY_NAME);
+                PartitionData partitionData = new PartitionData(offset, maxBytes);
+                fetchData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+    }
+
+    public int replicaId() {
+        return replicaId;
+    }
+
+    public int maxWait() {
+        return maxWait;
+    }
+
+    public int minBytes() {
+        return minBytes;
+    }
+
+    public Map<TopicPartition, PartitionData> fetchData() {
+        return fetchData;
+    }
+
+    public static FetchRequest parse(ByteBuffer buffer) {
+        return new FetchRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java
new file mode 100644
index 0000000000000..f719010119951
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class FetchResponse extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.FETCH.id);
+    private static String RESPONSES_KEY_NAME = "responses";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partition_responses";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+    private static String HIGH_WATERMARK_KEY_NAME = "high_watermark";
+    private static String RECORD_SET_KEY_NAME = "record_set";
+
+    private final Map<TopicPartition, PartitionData> responseData;
+
+    public static final class PartitionData {
+        public final short errorCode;
+        public final long highWatermark;
+        public final ByteBuffer recordSet;
+
+        public PartitionData(short errorCode, long highWatermark, ByteBuffer recordSet) {
+            this.errorCode = errorCode;
+            this.highWatermark = highWatermark;
+            this.recordSet = recordSet;
+        }
+    }
+
+    public FetchResponse(Map<TopicPartition, PartitionData> responseData) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(responseData);
+
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> topicEntry: topicsData.entrySet()) {
+            Struct topicData = struct.instance(RESPONSES_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, topicEntry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : topicEntry.getValue().entrySet()) {
+                PartitionData fetchPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(ERROR_CODE_KEY_NAME, fetchPartitionData.errorCode);
+                partitionData.set(HIGH_WATERMARK_KEY_NAME, fetchPartitionData.highWatermark);
+                partitionData.set(RECORD_SET_KEY_NAME, fetchPartitionData.recordSet);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(RESPONSES_KEY_NAME, topicArray.toArray());
+        this.responseData = responseData;
+    }
+
+    public FetchResponse(Struct struct) {
+        super(struct);
+        responseData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(RESPONSES_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                short errorCode = partitionResponse.getShort(ERROR_CODE_KEY_NAME);
+                long highWatermark = partitionResponse.getLong(HIGH_WATERMARK_KEY_NAME);
+                ByteBuffer recordSet = partitionResponse.getBytes(RECORD_SET_KEY_NAME);
+                PartitionData partitionData = new PartitionData(errorCode, highWatermark, recordSet);
+                responseData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+    }
+
+    public Map<TopicPartition, PartitionData> responseData() {
+        return responseData;
+    }
+
+    public static FetchResponse parse(ByteBuffer buffer) {
+        return new FetchResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatRequest.java
new file mode 100644
index 0000000000000..9512db2365d8d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatRequest.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+
+public class HeartbeatRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.HEARTBEAT.id);
+    private static String GROUP_ID_KEY_NAME = "group_id";
+    private static String GROUP_GENERATION_ID_KEY_NAME = "group_generation_id";
+    private static String CONSUMER_ID_KEY_NAME = "consumer_id";
+
+    private final String groupId;
+    private final int groupGenerationId;
+    private final String consumerId;
+
+    public HeartbeatRequest(String groupId, int groupGenerationId, String consumerId) {
+        super(new Struct(curSchema));
+        struct.set(GROUP_ID_KEY_NAME, groupId);
+        struct.set(GROUP_GENERATION_ID_KEY_NAME, groupGenerationId);
+        struct.set(CONSUMER_ID_KEY_NAME, consumerId);
+        this.groupId = groupId;
+        this.groupGenerationId = groupGenerationId;
+        this.consumerId = consumerId;
+    }
+
+    public HeartbeatRequest(Struct struct) {
+        super(struct);
+        groupId = struct.getString(GROUP_ID_KEY_NAME);
+        groupGenerationId = struct.getInt(GROUP_GENERATION_ID_KEY_NAME);
+        consumerId = struct.getString(CONSUMER_ID_KEY_NAME);
+    }
+
+    public String groupId() {
+        return groupId;
+    }
+
+    public int groupGenerationId() {
+        return groupGenerationId;
+    }
+
+    public String consumerId() {
+        return consumerId;
+    }
+
+    public static HeartbeatRequest parse(ByteBuffer buffer) {
+        return new HeartbeatRequest(((Struct) curSchema.read(buffer)));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatResponse.java
new file mode 100644
index 0000000000000..8997ffc44c18b
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/HeartbeatResponse.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+
+public class HeartbeatResponse extends AbstractRequestResponse {
+    private static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.HEARTBEAT.id);
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+
+    private final short errorCode;
+    public HeartbeatResponse(short errorCode) {
+        super(new Struct(curSchema));
+        struct.set(ERROR_CODE_KEY_NAME, errorCode);
+        this.errorCode = errorCode;
+    }
+
+    public HeartbeatResponse(Struct struct) {
+        super(struct);
+        errorCode = struct.getShort(ERROR_CODE_KEY_NAME);
+    }
+
+    public short errorCode() {
+        return errorCode;
+    }
+
+    public static HeartbeatResponse parse(ByteBuffer buffer) {
+        return new HeartbeatResponse(((Struct) curSchema.read(buffer)));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java
new file mode 100644
index 0000000000000..d6e91f3931798
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class JoinGroupRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.JOIN_GROUP.id);
+    private static String GROUP_ID_KEY_NAME = "group_id";
+    private static String SESSION_TIMEOUT_KEY_NAME = "session_timeout";
+    private static String TOPICS_KEY_NAME = "topics";
+    private static String CONSUMER_ID_KEY_NAME = "consumer_id";
+    private static String STRATEGY_KEY_NAME = "partition_assignment_strategy";
+
+    private final String groupId;
+    private final int sessionTimeout;
+    private final List<String> topics;
+    private final String consumerId;
+    private final String strategy;
+
+    public JoinGroupRequest(String groupId, int sessionTimeout, List<String> topics, String consumerId, String strategy) {
+        super(new Struct(curSchema));
+        struct.set(GROUP_ID_KEY_NAME, groupId);
+        struct.set(SESSION_TIMEOUT_KEY_NAME, sessionTimeout);
+        struct.set(TOPICS_KEY_NAME, topics.toArray());
+        struct.set(CONSUMER_ID_KEY_NAME, consumerId);
+        struct.set(STRATEGY_KEY_NAME, strategy);
+        this.groupId = groupId;
+        this.sessionTimeout = sessionTimeout;
+        this.topics = topics;
+        this.consumerId = consumerId;
+        this.strategy = strategy;
+    }
+
+    public JoinGroupRequest(Struct struct) {
+        super(struct);
+        groupId = struct.getString(GROUP_ID_KEY_NAME);
+        sessionTimeout = struct.getInt(SESSION_TIMEOUT_KEY_NAME);
+        Object[] topicsArray = struct.getArray(TOPICS_KEY_NAME);
+        topics = new ArrayList<String>();
+        for (Object topic: topicsArray)
+            topics.add((String) topic);
+        consumerId = struct.getString(CONSUMER_ID_KEY_NAME);
+        strategy = struct.getString(STRATEGY_KEY_NAME);
+    }
+
+    public String groupId() {
+        return groupId;
+    }
+
+    public int sessionTimeout() {
+        return sessionTimeout;
+    }
+
+    public List<String> topics() {
+        return topics;
+    }
+
+    public String consumerId() {
+        return consumerId;
+    }
+
+    public String strategy() {
+        return strategy;
+    }
+
+    public static JoinGroupRequest parse(ByteBuffer buffer) {
+        return new JoinGroupRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupResponse.java
new file mode 100644
index 0000000000000..efe89796a7bd0
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupResponse.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.*;
+
+public class JoinGroupResponse extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.JOIN_GROUP.id);
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+    private static String GENERATION_ID_KEY_NAME = "group_generation_id";
+    private static String CONSUMER_ID_KEY_NAME = "consumer_id";
+    private static String ASSIGNED_PARTITIONS_KEY_NAME = "assigned_partitions";
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partitions";
+
+    public static int UNKNOWN_GENERATION_ID = -1;
+    public static String UNKNOWN_CONSUMER_ID = "";
+
+    private final short errorCode;
+    private final int generationId;
+    private final String consumerId;
+    private final List<TopicPartition> assignedPartitions;
+
+    public JoinGroupResponse(short errorCode, int generationId, String consumerId, List<TopicPartition> assignedPartitions) {
+        super(new Struct(curSchema));
+
+        Map<String, List<Integer>> partitionsByTopic = CollectionUtils.groupDataByTopic(assignedPartitions);
+
+        struct.set(ERROR_CODE_KEY_NAME, errorCode);
+        struct.set(GENERATION_ID_KEY_NAME, generationId);
+        struct.set(CONSUMER_ID_KEY_NAME, consumerId);
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, List<Integer>> entries: partitionsByTopic.entrySet()) {
+            Struct topicData = struct.instance(ASSIGNED_PARTITIONS_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entries.getKey());
+            topicData.set(PARTITIONS_KEY_NAME, entries.getValue().toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(ASSIGNED_PARTITIONS_KEY_NAME, topicArray.toArray());
+
+        this.errorCode = errorCode;
+        this.generationId = generationId;
+        this.consumerId = consumerId;
+        this.assignedPartitions = assignedPartitions;
+    }
+
+    public JoinGroupResponse(short errorCode) {
+        this(errorCode, UNKNOWN_GENERATION_ID, UNKNOWN_CONSUMER_ID, Collections.<TopicPartition>emptyList());
+    }
+
+    public JoinGroupResponse(Struct struct) {
+        super(struct);
+        assignedPartitions = new ArrayList<TopicPartition>();
+        for (Object topicDataObj : struct.getArray(ASSIGNED_PARTITIONS_KEY_NAME)) {
+            Struct topicData = (Struct) topicDataObj;
+            String topic = topicData.getString(TOPIC_KEY_NAME);
+            for (Object partitionObj : topicData.getArray(PARTITIONS_KEY_NAME))
+                assignedPartitions.add(new TopicPartition(topic, (Integer) partitionObj));
+        }
+        errorCode = struct.getShort(ERROR_CODE_KEY_NAME);
+        generationId = struct.getInt(GENERATION_ID_KEY_NAME);
+        consumerId = struct.getString(CONSUMER_ID_KEY_NAME);
+    }
+
+    public short errorCode() {
+        return errorCode;
+    }
+
+    public int generationId() {
+        return generationId;
+    }
+
+    public String consumerId() {
+        return consumerId;
+    }
+
+    public List<TopicPartition> assignedPartitions() {
+        return assignedPartitions;
+    }
+
+    public static JoinGroupResponse parse(ByteBuffer buffer) {
+        return new JoinGroupResponse(((Struct) curSchema.read(buffer)));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetRequest.java
new file mode 100644
index 0000000000000..99364c1ca464f
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetRequest.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ListOffsetRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.LIST_OFFSETS.id);
+    private static String REPLICA_ID_KEY_NAME = "replica_id";
+    private static String TOPICS_KEY_NAME = "topics";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partitions";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String TIMESTAMP_KEY_NAME = "timestamp";
+    private static String MAX_NUM_OFFSETS_KEY_NAME = "max_num_offsets";
+
+    private final int replicaId;
+    private final Map<TopicPartition, PartitionData> offsetData;
+
+    public static final class PartitionData {
+        public final long timestamp;
+        public final int maxNumOffsets;
+
+        public PartitionData(long timestamp, int maxNumOffsets) {
+            this.timestamp = timestamp;
+            this.maxNumOffsets = maxNumOffsets;
+        }
+    }
+
+    public ListOffsetRequest(int replicaId, Map<TopicPartition, PartitionData> offsetData) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(offsetData);
+
+        struct.set(REPLICA_ID_KEY_NAME, replicaId);
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> topicEntry: topicsData.entrySet()) {
+            Struct topicData = struct.instance(TOPICS_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, topicEntry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : topicEntry.getValue().entrySet()) {
+                PartitionData offsetPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(TIMESTAMP_KEY_NAME, offsetPartitionData.timestamp);
+                partitionData.set(MAX_NUM_OFFSETS_KEY_NAME, offsetPartitionData.maxNumOffsets);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(TOPICS_KEY_NAME, topicArray.toArray());
+        this.replicaId = replicaId;
+        this.offsetData = offsetData;
+    }
+
+    public ListOffsetRequest(Struct struct) {
+        super(struct);
+        replicaId = struct.getInt(REPLICA_ID_KEY_NAME);
+        offsetData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(TOPICS_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                long timestamp = partitionResponse.getLong(TIMESTAMP_KEY_NAME);
+                int maxNumOffsets = partitionResponse.getInt(MAX_NUM_OFFSETS_KEY_NAME);
+                PartitionData partitionData = new PartitionData(timestamp, maxNumOffsets);
+                offsetData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+    }
+
+    public int replicaId() {
+        return replicaId;
+    }
+
+    public Map<TopicPartition, PartitionData> offsetData() {
+        return offsetData;
+    }
+
+    public static ListOffsetRequest parse(ByteBuffer buffer) {
+        return new ListOffsetRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetResponse.java
new file mode 100644
index 0000000000000..ac239712f1184
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ListOffsetResponse.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ListOffsetResponse extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.LIST_OFFSETS.id);
+    private static String RESPONSES_KEY_NAME = "responses";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partition_responses";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+    private static String OFFSETS_KEY_NAME = "offsets";
+
+    private final Map<TopicPartition, PartitionData> responseData;
+
+    public static final class PartitionData {
+        public final short errorCode;
+        public final List<Long> offsets;
+
+        public PartitionData(short errorCode, List<Long> offsets) {
+            this.errorCode = errorCode;
+            this.offsets = offsets;
+        }
+    }
+
+    public ListOffsetResponse(Map<TopicPartition, PartitionData> responseData) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(responseData);
+
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> topicEntry: topicsData.entrySet()) {
+            Struct topicData = struct.instance(RESPONSES_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, topicEntry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : topicEntry.getValue().entrySet()) {
+                PartitionData offsetPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(ERROR_CODE_KEY_NAME, offsetPartitionData.errorCode);
+                partitionData.set(OFFSETS_KEY_NAME, offsetPartitionData.offsets.toArray());
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(RESPONSES_KEY_NAME, topicArray.toArray());
+        this.responseData = responseData;
+    }
+
+    public ListOffsetResponse(Struct struct) {
+        super(struct);
+        responseData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(RESPONSES_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                short errorCode = partitionResponse.getShort(ERROR_CODE_KEY_NAME);
+                Object[] offsets = partitionResponse.getArray(OFFSETS_KEY_NAME);
+                List<Long> offsetsList = new ArrayList<Long>();
+                for (Object offset: offsets)
+                    offsetsList.add((Long) offset);
+                PartitionData partitionData = new PartitionData(errorCode, offsetsList);
+                responseData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+    }
+
+    public Map<TopicPartition, PartitionData> responseData() {
+        return responseData;
+    }
+
+    public static ListOffsetResponse parse(ByteBuffer buffer) {
+        return new ListOffsetResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java
new file mode 100644
index 0000000000000..b22ca1dce65f6
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+public class MetadataRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.METADATA.id);
+    private static String TOPICS_KEY_NAME = "topics";
+
+    private final List<String> topics;
+
+    public MetadataRequest(List<String> topics) {
+        super(new Struct(curSchema));
+        struct.set(TOPICS_KEY_NAME, topics.toArray());
+        this.topics = topics;
+    }
+
+    public MetadataRequest(Struct struct) {
+        super(struct);
+        Object[] topicArray = struct.getArray(TOPICS_KEY_NAME);
+        topics = new ArrayList<String>();
+        for (Object topicObj: topicArray) {
+            topics.add((String) topicObj);
+        }
+    }
+
+    public List<String> topics() {
+        return topics;
+    }
+
+    public static MetadataRequest parse(ByteBuffer buffer) {
+        return new MetadataRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java
new file mode 100644
index 0000000000000..d97962d384017
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+
+public class MetadataResponse extends AbstractRequestResponse {
+    private static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.METADATA.id);
+    private static String BROKERS_KEY_NAME = "brokers";
+    private static String TOPIC_METATDATA_KEY_NAME = "topic_metadata";
+
+    // broker level field names
+    private static String NODE_ID_KEY_NAME = "node_id";
+    private static String HOST_KEY_NAME = "host";
+    private static String PORT_KEY_NAME = "port";
+
+    // topic level field names
+    private static String TOPIC_ERROR_CODE_KEY_NAME = "topic_error_code";
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITION_METADATA_KEY_NAME = "partition_metadata";
+
+    // partition level field names
+    private static String PARTITION_ERROR_CODE_KEY_NAME = "partition_error_code";
+    private static String PARTITION_KEY_NAME = "partition_id";
+    private static String LEADER_KEY_NAME = "leader";
+    private static String REPLICAS_KEY_NAME = "replicas";
+    private static String ISR_KEY_NAME = "isr";
+
+    private final Cluster cluster;
+    private final Map<String, Errors> errors;
+
+    public MetadataResponse(Cluster cluster) {
+        super(new Struct(curSchema));
+
+        List<Struct> brokerArray = new ArrayList<Struct>();
+        for (Node node: cluster.nodes()) {
+            Struct broker = struct.instance(BROKERS_KEY_NAME);
+            broker.set(NODE_ID_KEY_NAME, node.id());
+            broker.set(HOST_KEY_NAME, node.host());
+            broker.set(PORT_KEY_NAME, node.port());
+            brokerArray.add(broker);
+        }
+        struct.set(BROKERS_KEY_NAME, brokerArray.toArray());
+
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (String topic: cluster.topics()) {
+            Struct topicData = struct.instance(TOPIC_METATDATA_KEY_NAME);
+            topicData.set(TOPIC_ERROR_CODE_KEY_NAME, (short)0);  // no error
+            topicData.set(TOPIC_KEY_NAME, topic);
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (PartitionInfo fetchPartitionData : cluster.partitionsForTopic(topic)) {
+                Struct partitionData = topicData.instance(PARTITION_METADATA_KEY_NAME);
+                partitionData.set(PARTITION_ERROR_CODE_KEY_NAME, (short)0);  // no error
+                partitionData.set(PARTITION_KEY_NAME, fetchPartitionData.partition());
+                partitionData.set(LEADER_KEY_NAME, fetchPartitionData.leader().id());
+                ArrayList<Integer> replicas = new ArrayList<Integer>();
+                for (Node node: fetchPartitionData.replicas())
+                    replicas.add(node.id());
+                partitionData.set(REPLICAS_KEY_NAME, replicas.toArray());
+                ArrayList<Integer> isr = new ArrayList<Integer>();
+                for (Node node: fetchPartitionData.inSyncReplicas())
+                    isr.add(node.id());
+                partitionData.set(ISR_KEY_NAME, isr.toArray());
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITION_METADATA_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(TOPIC_METATDATA_KEY_NAME, topicArray.toArray());
+
+        this.cluster = cluster;
+        this.errors = new HashMap<String, Errors>();
+    }
+
+    public MetadataResponse(Struct struct) {
+        super(struct);
+        Map<String, Errors> errors = new HashMap<String, Errors>();
+        Map<Integer, Node> brokers = new HashMap<Integer, Node>();
+        Object[] brokerStructs = (Object[]) struct.get(BROKERS_KEY_NAME);
+        for (int i = 0; i < brokerStructs.length; i++) {
+            Struct broker = (Struct) brokerStructs[i];
+            int nodeId = broker.getInt(NODE_ID_KEY_NAME);
+            String host = broker.getString(HOST_KEY_NAME);
+            int port = broker.getInt(PORT_KEY_NAME);
+            brokers.put(nodeId, new Node(nodeId, host, port));
+        }
+        List<PartitionInfo> partitions = new ArrayList<PartitionInfo>();
+        Object[] topicInfos = (Object[]) struct.get(TOPIC_METATDATA_KEY_NAME);
+        for (int i = 0; i < topicInfos.length; i++) {
+            Struct topicInfo = (Struct) topicInfos[i];
+            short topicError = topicInfo.getShort(TOPIC_ERROR_CODE_KEY_NAME);
+            String topic = topicInfo.getString(TOPIC_KEY_NAME);
+            if (topicError == Errors.NONE.code()) {
+                Object[] partitionInfos = (Object[]) topicInfo.get(PARTITION_METADATA_KEY_NAME);
+                for (int j = 0; j < partitionInfos.length; j++) {
+                    Struct partitionInfo = (Struct) partitionInfos[j];
+                    int partition = partitionInfo.getInt(PARTITION_KEY_NAME);
+                    int leader = partitionInfo.getInt(LEADER_KEY_NAME);
+                    Node leaderNode = leader == -1 ? null : brokers.get(leader);
+                    Object[] replicas = (Object[]) partitionInfo.get(REPLICAS_KEY_NAME);
+                    Node[] replicaNodes = new Node[replicas.length];
+                    for (int k = 0; k < replicas.length; k++)
+                        replicaNodes[k] = brokers.get(replicas[k]);
+                    Object[] isr = (Object[]) partitionInfo.get(ISR_KEY_NAME);
+                    Node[] isrNodes = new Node[isr.length];
+                    for (int k = 0; k < isr.length; k++)
+                        isrNodes[k] = brokers.get(isr[k]);
+                    partitions.add(new PartitionInfo(topic, partition, leaderNode, replicaNodes, isrNodes));
+                }
+            } else {
+                errors.put(topic, Errors.forCode(topicError));
+            }
+        }
+        this.errors = errors;
+        this.cluster = new Cluster(brokers.values(), partitions);
+    }
+
+    public Map<String, Errors> errors() {
+        return this.errors;
+    }
+
+    public Cluster cluster() {
+        return this.cluster;
+    }
+
+    public static MetadataResponse parse(ByteBuffer buffer) {
+        return new MetadataResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitRequest.java
new file mode 100644
index 0000000000000..3ee5cbad55ce8
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitRequest.java
@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This wrapper supports both v0 and v1 of OffsetCommitRequest.
+ */
+public class OffsetCommitRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.OFFSET_COMMIT.id);
+    private static String GROUP_ID_KEY_NAME = "group_id";
+    private static String GENERATION_ID_KEY_NAME = "group_generation_id";
+    private static String CONSUMER_ID_KEY_NAME = "consumer_id";
+    private static String TOPICS_KEY_NAME = "topics";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partitions";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String COMMIT_OFFSET_KEY_NAME = "offset";
+    private static String TIMESTAMP_KEY_NAME = "timestamp";
+    private static String METADATA_KEY_NAME = "metadata";
+
+    public static final int DEFAULT_GENERATION_ID = -1;
+    public static final String DEFAULT_CONSUMER_ID = "";
+
+    private final String groupId;
+    private final int generationId;
+    private final String consumerId;
+    private final Map<TopicPartition, PartitionData> offsetData;
+
+    public static final class PartitionData {
+        public final long offset;
+        public final long timestamp;
+        public final String metadata;
+
+        public PartitionData(long offset, long timestamp, String metadata) {
+            this.offset = offset;
+            this.timestamp = timestamp;
+            this.metadata = metadata;
+        }
+    }
+
+    /**
+     * Constructor for version 0.
+     * @param groupId
+     * @param offsetData
+     */
+    @Deprecated
+    public OffsetCommitRequest(String groupId, Map<TopicPartition, PartitionData> offsetData) {
+        super(new Struct(ProtoUtils.requestSchema(ApiKeys.OFFSET_COMMIT.id, 0)));
+        initCommonFields(groupId, offsetData);
+        this.groupId = groupId;
+        this.generationId = DEFAULT_GENERATION_ID;
+        this.consumerId = DEFAULT_CONSUMER_ID;
+        this.offsetData = offsetData;
+    }
+
+    /**
+     * Constructor for version 1.
+     * @param groupId
+     * @param generationId
+     * @param consumerId
+     * @param offsetData
+     */
+    public OffsetCommitRequest(String groupId, int generationId, String consumerId, Map<TopicPartition, PartitionData> offsetData) {
+        super(new Struct(curSchema));
+
+        initCommonFields(groupId, offsetData);
+        struct.set(GENERATION_ID_KEY_NAME, generationId);
+        struct.set(CONSUMER_ID_KEY_NAME, consumerId);
+        this.groupId = groupId;
+        this.generationId = generationId;
+        this.consumerId = consumerId;
+        this.offsetData = offsetData;
+    }
+
+    private void initCommonFields(String groupId, Map<TopicPartition, PartitionData> offsetData) {
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(offsetData);
+
+        struct.set(GROUP_ID_KEY_NAME, groupId);
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> topicEntry: topicsData.entrySet()) {
+            Struct topicData = struct.instance(TOPICS_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, topicEntry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : topicEntry.getValue().entrySet()) {
+                PartitionData fetchPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(COMMIT_OFFSET_KEY_NAME, fetchPartitionData.offset);
+                partitionData.set(TIMESTAMP_KEY_NAME, fetchPartitionData.timestamp);
+                partitionData.set(METADATA_KEY_NAME, fetchPartitionData.metadata);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(TOPICS_KEY_NAME, topicArray.toArray());
+    }
+
+    public OffsetCommitRequest(Struct struct) {
+        super(struct);
+        offsetData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(TOPICS_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                long offset = partitionResponse.getLong(COMMIT_OFFSET_KEY_NAME);
+                long timestamp = partitionResponse.getLong(TIMESTAMP_KEY_NAME);
+                String metadata = partitionResponse.getString(METADATA_KEY_NAME);
+                PartitionData partitionData = new PartitionData(offset, timestamp, metadata);
+                offsetData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+        groupId = struct.getString(GROUP_ID_KEY_NAME);
+        // This field only exists in v1.
+        if (struct.hasField(GENERATION_ID_KEY_NAME))
+            generationId = struct.getInt(GENERATION_ID_KEY_NAME);
+        else
+            generationId = DEFAULT_GENERATION_ID;
+
+        // This field only exists in v1.
+        if (struct.hasField(CONSUMER_ID_KEY_NAME))
+            consumerId = struct.getString(CONSUMER_ID_KEY_NAME);
+        else
+            consumerId = DEFAULT_CONSUMER_ID;
+    }
+
+    public String groupId() {
+        return groupId;
+    }
+
+    public int generationId() {
+        return generationId;
+    }
+
+    public String consumerId() {
+        return consumerId;
+    }
+
+    public Map<TopicPartition, PartitionData> offsetData() {
+        return offsetData;
+    }
+
+    public static OffsetCommitRequest parse(ByteBuffer buffer, int versionId) {
+        Schema schema = ProtoUtils.requestSchema(ApiKeys.OFFSET_COMMIT.id, versionId);
+        return new OffsetCommitRequest(((Struct) schema.read(buffer)));
+    }
+
+    public static OffsetCommitRequest parse(ByteBuffer buffer) {
+        return new OffsetCommitRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitResponse.java
new file mode 100644
index 0000000000000..711232ac61378
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/OffsetCommitResponse.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class OffsetCommitResponse extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.OFFSET_COMMIT.id);
+    private static String RESPONSES_KEY_NAME = "responses";
+
+    // topic level fields
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partition_responses";
+
+    // partition level fields
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+
+    private final Map<TopicPartition, Short> responseData;
+
+    public OffsetCommitResponse(Map<TopicPartition, Short> responseData) {
+        super(new Struct(curSchema));
+
+        Map<String, Map<Integer, Short>> topicsData = CollectionUtils.groupDataByTopic(responseData);
+
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, Short>> entries: topicsData.entrySet()) {
+            Struct topicData = struct.instance(RESPONSES_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entries.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, Short> partitionEntry : entries.getValue().entrySet()) {
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(ERROR_CODE_KEY_NAME, partitionEntry.getValue());
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(RESPONSES_KEY_NAME, topicArray.toArray());
+        this.responseData = responseData;
+    }
+
+    public OffsetCommitResponse(Struct struct) {
+        super(struct);
+        responseData = new HashMap<TopicPartition, Short>();
+        for (Object topicResponseObj : struct.getArray(RESPONSES_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                short errorCode = partitionResponse.getShort(ERROR_CODE_KEY_NAME);
+                responseData.put(new TopicPartition(topic, partition), errorCode);
+            }
+        }
+    }
+
+    public Map<TopicPartition, Short> responseData() {
+        return responseData;
+    }
+
+    public static OffsetCommitResponse parse(ByteBuffer buffer) {
+        return new OffsetCommitResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchRequest.java
new file mode 100644
index 0000000000000..90d5135b97a44
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchRequest.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This wrapper supports both v0 and v1 of OffsetFetchRequest.
+ */
+public class OffsetFetchRequest extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.OFFSET_FETCH.id);
+    private static String GROUP_ID_KEY_NAME = "group_id";
+    private static String TOPICS_KEY_NAME = "topics";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partitions";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+
+    public static final int DEFAULT_GENERATION_ID = -1;
+    public static final String DEFAULT_CONSUMER_ID = "";
+
+    private final String groupId;
+    private final List<TopicPartition> partitions;
+
+    public OffsetFetchRequest(String groupId, List<TopicPartition> partitions) {
+        super(new Struct(curSchema));
+
+        Map<String, List<Integer>> topicsData = CollectionUtils.groupDataByTopic(partitions);
+
+        struct.set(GROUP_ID_KEY_NAME, groupId);
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, List<Integer>> entries: topicsData.entrySet()) {
+            Struct topicData = struct.instance(TOPICS_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entries.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Integer partiitonId : entries.getValue()) {
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partiitonId);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(TOPICS_KEY_NAME, topicArray.toArray());
+        this.groupId = groupId;
+        this.partitions = partitions;
+    }
+
+    public OffsetFetchRequest(Struct struct) {
+        super(struct);
+        partitions = new ArrayList<TopicPartition>();
+        for (Object topicResponseObj : struct.getArray(TOPICS_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                partitions.add(new TopicPartition(topic, partition));
+            }
+        }
+        groupId = struct.getString(GROUP_ID_KEY_NAME);
+   }
+
+    public String groupId() {
+        return groupId;
+    }
+
+    public List<TopicPartition> partitions() {
+        return partitions;
+    }
+
+    public static OffsetFetchRequest parse(ByteBuffer buffer) {
+        return new OffsetFetchRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java
new file mode 100644
index 0000000000000..6b7c269ad7679
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class OffsetFetchResponse extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.OFFSET_FETCH.id);
+    private static String RESPONSES_KEY_NAME = "responses";
+
+    // topic level fields
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITIONS_KEY_NAME = "partition_responses";
+
+    // partition level fields
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String COMMIT_OFFSET_KEY_NAME = "offset";
+    private static String METADATA_KEY_NAME = "metadata";
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+
+    private final Map<TopicPartition,PartitionData> responseData;
+
+    public static final class PartitionData {
+        public final long offset;
+        public final String metadata;
+        public final short errorCode;
+
+        public PartitionData(long offset, String metadata, short errorCode) {
+            this.offset = offset;
+            this.metadata = metadata;
+            this.errorCode = errorCode;
+        }
+    }
+
+    public OffsetFetchResponse(Map<TopicPartition, PartitionData> responseData) {
+        super(new Struct(curSchema));
+
+        Map<String, Map<Integer, PartitionData>> topicsData = CollectionUtils.groupDataByTopic(responseData);
+
+        List<Struct> topicArray = new ArrayList<Struct>();
+        for (Map.Entry<String, Map<Integer, PartitionData>> entries: topicsData.entrySet()) {
+            Struct topicData = struct.instance(RESPONSES_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entries.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionData> partitionEntry : entries.getValue().entrySet()) {
+                PartitionData fetchPartitionData = partitionEntry.getValue();
+                Struct partitionData = topicData.instance(PARTITIONS_KEY_NAME);
+                partitionData.set(PARTITION_KEY_NAME, partitionEntry.getKey());
+                partitionData.set(COMMIT_OFFSET_KEY_NAME, fetchPartitionData.offset);
+                partitionData.set(METADATA_KEY_NAME, fetchPartitionData.metadata);
+                partitionData.set(ERROR_CODE_KEY_NAME, fetchPartitionData.errorCode);
+                partitionArray.add(partitionData);
+            }
+            topicData.set(PARTITIONS_KEY_NAME, partitionArray.toArray());
+            topicArray.add(topicData);
+        }
+        struct.set(RESPONSES_KEY_NAME, topicArray.toArray());
+        this.responseData = responseData;
+    }
+
+    public OffsetFetchResponse(Struct struct) {
+        super(struct);
+        responseData = new HashMap<TopicPartition, PartitionData>();
+        for (Object topicResponseObj : struct.getArray(RESPONSES_KEY_NAME)) {
+            Struct topicResponse = (Struct) topicResponseObj;
+            String topic = topicResponse.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicResponse.getArray(PARTITIONS_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                long offset = partitionResponse.getLong(COMMIT_OFFSET_KEY_NAME);
+                String metadata = partitionResponse.getString(METADATA_KEY_NAME);
+                short errorCode = partitionResponse.getShort(ERROR_CODE_KEY_NAME);
+                PartitionData partitionData = new PartitionData(offset, metadata, errorCode);
+                responseData.put(new TopicPartition(topic, partition), partitionData);
+            }
+        }
+    }
+
+    public Map<TopicPartition, PartitionData> responseData() {
+        return responseData;
+    }
+
+    public static OffsetFetchResponse parse(ByteBuffer buffer) {
+        return new OffsetFetchResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ProduceRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/ProduceRequest.java
new file mode 100644
index 0000000000000..3dbba8a360f11
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ProduceRequest.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ProduceRequest  extends AbstractRequestResponse {
+    public static Schema curSchema = ProtoUtils.currentRequestSchema(ApiKeys.PRODUCE.id);
+    private static String ACKS_KEY_NAME = "acks";
+    private static String TIMEOUT_KEY_NAME = "timeout";
+    private static String TOPIC_DATA_KEY_NAME = "topic_data";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITION_DATA_KEY_NAME = "data";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String RECORD_SET_KEY_NAME = "record_set";
+
+    private final short acks;
+    private final int timeout;
+    private final Map<TopicPartition, ByteBuffer> partitionRecords;
+
+    public ProduceRequest(short acks, int timeout, Map<TopicPartition, ByteBuffer> partitionRecords) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, ByteBuffer>> recordsByTopic = CollectionUtils.groupDataByTopic(partitionRecords);
+        struct.set(ACKS_KEY_NAME, acks);
+        struct.set(TIMEOUT_KEY_NAME, timeout);
+        List<Struct> topicDatas = new ArrayList<Struct>(recordsByTopic.size());
+        for (Map.Entry<String, Map<Integer, ByteBuffer>> entry : recordsByTopic.entrySet()) {
+            Struct topicData = struct.instance(TOPIC_DATA_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, ByteBuffer> partitionEntry : entry.getValue().entrySet()) {
+                ByteBuffer buffer = partitionEntry.getValue().duplicate();
+                Struct part = topicData.instance(PARTITION_DATA_KEY_NAME)
+                                       .set(PARTITION_KEY_NAME, partitionEntry.getKey())
+                                       .set(RECORD_SET_KEY_NAME, buffer);
+                partitionArray.add(part);
+            }
+            topicData.set(PARTITION_DATA_KEY_NAME, partitionArray.toArray());
+            topicDatas.add(topicData);
+        }
+        struct.set(TOPIC_DATA_KEY_NAME, topicDatas.toArray());
+        this.acks = acks;
+        this.timeout = timeout;
+        this.partitionRecords = partitionRecords;
+    }
+
+    public ProduceRequest(Struct struct) {
+        super(struct);
+        partitionRecords = new HashMap<TopicPartition, ByteBuffer>();
+        for (Object topicDataObj : struct.getArray(TOPIC_DATA_KEY_NAME)) {
+            Struct topicData = (Struct) topicDataObj;
+            String topic = topicData.getString(TOPIC_KEY_NAME);
+            for (Object partitionResponseObj : topicData.getArray(PARTITION_DATA_KEY_NAME)) {
+                Struct partitionResponse = (Struct) partitionResponseObj;
+                int partition = partitionResponse.getInt(PARTITION_KEY_NAME);
+                ByteBuffer records = partitionResponse.getBytes(RECORD_SET_KEY_NAME);
+                partitionRecords.put(new TopicPartition(topic, partition), records);
+            }
+        }
+        acks = struct.getShort(ACKS_KEY_NAME);
+        timeout = struct.getInt(TIMEOUT_KEY_NAME);
+    }
+
+    public short acks() {
+        return acks;
+    }
+
+    public int timeout() {
+        return timeout;
+    }
+
+    public Map<TopicPartition, ByteBuffer> partitionRecords() {
+        return partitionRecords;
+    }
+
+    public static ProduceRequest parse(ByteBuffer buffer) {
+        return new ProduceRequest(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ProduceResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/ProduceResponse.java
new file mode 100644
index 0000000000000..5220464913e6e
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ProduceResponse.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Schema;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.utils.CollectionUtils;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ProduceResponse extends AbstractRequestResponse {
+    private static Schema curSchema = ProtoUtils.currentResponseSchema(ApiKeys.PRODUCE.id);
+    private static String RESPONSES_KEY_NAME = "responses";
+
+    // topic level field names
+    private static String TOPIC_KEY_NAME = "topic";
+    private static String PARTITION_RESPONSES_KEY_NAME = "partition_responses";
+
+    // partition level field names
+    private static String PARTITION_KEY_NAME = "partition";
+    private static String ERROR_CODE_KEY_NAME = "error_code";
+    private static String BASE_OFFSET_KEY_NAME = "base_offset";
+
+    private final Map<TopicPartition, PartitionResponse> responses;
+
+    public ProduceResponse(Map<TopicPartition, PartitionResponse> responses) {
+        super(new Struct(curSchema));
+        Map<String, Map<Integer, PartitionResponse>> responseByTopic = CollectionUtils.groupDataByTopic(responses);
+        List<Struct> topicDatas = new ArrayList<Struct>(responseByTopic.size());
+        for (Map.Entry<String, Map<Integer, PartitionResponse>> entry : responseByTopic.entrySet()) {
+            Struct topicData = struct.instance(RESPONSES_KEY_NAME);
+            topicData.set(TOPIC_KEY_NAME, entry.getKey());
+            List<Struct> partitionArray = new ArrayList<Struct>();
+            for (Map.Entry<Integer, PartitionResponse> partitionEntry : entry.getValue().entrySet()) {
+                PartitionResponse part = partitionEntry.getValue();
+                Struct partStruct = topicData.instance(PARTITION_RESPONSES_KEY_NAME)
+                                       .set(PARTITION_KEY_NAME, partitionEntry.getKey())
+                                       .set(ERROR_CODE_KEY_NAME, part.errorCode)
+                                       .set(BASE_OFFSET_KEY_NAME, part.baseOffset);
+                partitionArray.add(partStruct);
+            }
+            topicData.set(PARTITION_RESPONSES_KEY_NAME, partitionArray.toArray());
+            topicDatas.add(topicData);
+        }
+        struct.set(RESPONSES_KEY_NAME, topicDatas.toArray());
+        this.responses = responses;
+    }
+
+    public ProduceResponse(Struct struct) {
+        super(struct);
+        responses = new HashMap<TopicPartition, PartitionResponse>();
+        for (Object topicResponse : struct.getArray("responses")) {
+            Struct topicRespStruct = (Struct) topicResponse;
+            String topic = topicRespStruct.getString("topic");
+            for (Object partResponse : topicRespStruct.getArray("partition_responses")) {
+                Struct partRespStruct = (Struct) partResponse;
+                int partition = partRespStruct.getInt("partition");
+                short errorCode = partRespStruct.getShort("error_code");
+                long offset = partRespStruct.getLong("base_offset");
+                TopicPartition tp = new TopicPartition(topic, partition);
+                responses.put(tp, new PartitionResponse(errorCode, offset));
+            }
+        }
+    }
+
+    public Map<TopicPartition, PartitionResponse> responses() {
+        return this.responses;
+    }
+
+    public static final class PartitionResponse {
+        public short errorCode;
+        public long baseOffset;
+
+        public PartitionResponse(short errorCode, long baseOffset) {
+            this.errorCode = errorCode;
+            this.baseOffset = baseOffset;
+        }
+
+        @Override
+        public String toString() {
+            StringBuilder b = new StringBuilder();
+            b.append('{');
+            b.append("error: ");
+            b.append(errorCode);
+            b.append(",offset: ");
+            b.append(baseOffset);
+            b.append('}');
+            return b.toString();
+        }
+    }
+
+    public static ProduceResponse parse(ByteBuffer buffer) {
+        return new ProduceResponse(((Struct) curSchema.read(buffer)));
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/RequestHeader.java b/clients/src/main/java/org/apache/kafka/common/requests/RequestHeader.java
index 457abb1ad44ed..f459a2a62f7b9 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/RequestHeader.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/RequestHeader.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.requests;
 
@@ -25,22 +21,27 @@
 import org.apache.kafka.common.protocol.types.Field;
 import org.apache.kafka.common.protocol.types.Struct;
 
-
 /**
  * The header for a request in the Kafka protocol
  */
-public class RequestHeader {
+public class RequestHeader extends AbstractRequestResponse {
 
     private static Field API_KEY_FIELD = REQUEST_HEADER.get("api_key");
     private static Field API_VERSION_FIELD = REQUEST_HEADER.get("api_version");
     private static Field CLIENT_ID_FIELD = REQUEST_HEADER.get("client_id");
     private static Field CORRELATION_ID_FIELD = REQUEST_HEADER.get("correlation_id");
 
-    private final Struct header;
+    private final short apiKey;
+    private final short apiVersion;
+    private final String clientId;
+    private final int correlationId;
 
     public RequestHeader(Struct header) {
-        super();
-        this.header = header;
+        super(header);
+        apiKey = struct.getShort(API_KEY_FIELD);
+        apiVersion = struct.getShort(API_VERSION_FIELD);
+        clientId = struct.getString(CLIENT_ID_FIELD);
+        correlationId = struct.getInt(CORRELATION_ID_FIELD);
     }
 
     public RequestHeader(short apiKey, String client, int correlation) {
@@ -48,38 +49,34 @@ public RequestHeader(short apiKey, String client, int correlation) {
     }
 
     public RequestHeader(short apiKey, short version, String client, int correlation) {
-        this(new Struct(Protocol.REQUEST_HEADER));
-        this.header.set(API_KEY_FIELD, apiKey);
-        this.header.set(API_VERSION_FIELD, version);
-        this.header.set(CLIENT_ID_FIELD, client);
-        this.header.set(CORRELATION_ID_FIELD, correlation);
+        super(new Struct(Protocol.REQUEST_HEADER));
+        struct.set(API_KEY_FIELD, apiKey);
+        struct.set(API_VERSION_FIELD, version);
+        struct.set(CLIENT_ID_FIELD, client);
+        struct.set(CORRELATION_ID_FIELD, correlation);
+        this.apiKey = apiKey;
+        this.apiVersion = version;
+        this.clientId = client;
+        this.correlationId = correlation;
     }
 
     public short apiKey() {
-        return (Short) this.header.get(API_KEY_FIELD);
+        return apiKey;
     }
 
     public short apiVersion() {
-        return (Short) this.header.get(API_VERSION_FIELD);
+        return apiVersion;
     }
 
     public String clientId() {
-        return (String) this.header.get(CLIENT_ID_FIELD);
+        return clientId;
     }
 
     public int correlationId() {
-        return (Integer) this.header.get(CORRELATION_ID_FIELD);
+        return correlationId;
     }
 
     public static RequestHeader parse(ByteBuffer buffer) {
         return new RequestHeader((Struct) Protocol.REQUEST_HEADER.read(buffer));
     }
-
-    public void writeTo(ByteBuffer buffer) {
-        header.writeTo(buffer);
-    }
-
-    public int sizeOf() {
-        return header.sizeOf();
-    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/RequestSend.java b/clients/src/main/java/org/apache/kafka/common/requests/RequestSend.java
index c5e9020b2e769..27cbf390c7f14 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/RequestSend.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/RequestSend.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.requests;
 
@@ -21,7 +17,6 @@
 import org.apache.kafka.common.network.NetworkSend;
 import org.apache.kafka.common.protocol.types.Struct;
 
-
 /**
  * A send object for a kafka request
  */
@@ -52,4 +47,9 @@ public Struct body() {
         return body;
     }
 
+    @Override
+    public String toString() {
+        return "RequestSend(header=" + header.toString() + ", body=" + body.toString() + ")";
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ResponseHeader.java b/clients/src/main/java/org/apache/kafka/common/requests/ResponseHeader.java
index 257b8287757e4..dd63853e15f50 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/ResponseHeader.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ResponseHeader.java
@@ -28,31 +28,25 @@
 /**
  * A response header in the kafka protocol.
  */
-public class ResponseHeader {
+public class ResponseHeader extends AbstractRequestResponse {
 
     private static Field CORRELATION_KEY_FIELD = RESPONSE_HEADER.get("correlation_id");
 
-    private final Struct header;
+    private final int correlationId;
 
     public ResponseHeader(Struct header) {
-        this.header = header;
+        super(header);
+        correlationId = struct.getInt(CORRELATION_KEY_FIELD);
     }
 
     public ResponseHeader(int correlationId) {
-        this(new Struct(Protocol.RESPONSE_HEADER));
-        this.header.set(CORRELATION_KEY_FIELD, correlationId);
+        super(new Struct(Protocol.RESPONSE_HEADER));
+        struct.set(CORRELATION_KEY_FIELD, correlationId);
+        this.correlationId = correlationId;
     }
 
     public int correlationId() {
-        return (Integer) header.get(CORRELATION_KEY_FIELD);
-    }
-
-    public void writeTo(ByteBuffer buffer) {
-        header.writeTo(buffer);
-    }
-
-    public int sizeOf() {
-        return header.sizeOf();
+        return correlationId;
     }
 
     public static ResponseHeader parse(ByteBuffer buffer) {
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/ClientUtils.java b/clients/src/main/java/org/apache/kafka/common/utils/ClientUtils.java
new file mode 100644
index 0000000000000..b987e7f0434c6
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/utils/ClientUtils.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.utils;
+
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.config.ConfigException;
+
+import static org.apache.kafka.common.utils.Utils.getHost;
+import static org.apache.kafka.common.utils.Utils.getPort;
+
+public class ClientUtils {
+
+    public static List<InetSocketAddress> parseAndValidateAddresses(List<String> urls) {
+        List<InetSocketAddress> addresses = new ArrayList<InetSocketAddress>();
+        for (String url : urls) {
+            if (url != null && url.length() > 0) {
+                String host = getHost(url);
+                Integer port = getPort(url);
+                if (host == null || port == null)
+                    throw new ConfigException("Invalid url in " + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG + ": " + url);
+                try {
+                    InetSocketAddress address = new InetSocketAddress(host, port);
+                    if (address.isUnresolved())
+                        throw new ConfigException("DNS resolution failed for url in " + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG + ": " + url);
+                    addresses.add(address);
+                } catch (NumberFormatException e) {
+                    throw new ConfigException("Invalid port in " + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG + ": " + url);
+                }
+            }
+        }
+        if (addresses.size() < 1)
+            throw new ConfigException("No bootstrap urls given in " + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG);
+        return addresses;
+    }
+}
\ No newline at end of file
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/CollectionUtils.java b/clients/src/main/java/org/apache/kafka/common/utils/CollectionUtils.java
new file mode 100644
index 0000000000000..ba3863734391b
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/utils/CollectionUtils.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.utils;
+
+import org.apache.kafka.common.TopicPartition;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class CollectionUtils {
+    /**
+     * group data by topic
+     * @param data Data to be partitioned
+     * @param <T> Partition data type
+     * @return partitioned data
+     */
+    public static <T> Map<String, Map<Integer, T>> groupDataByTopic(Map<TopicPartition, T> data) {
+        Map<String, Map<Integer, T>> dataByTopic = new HashMap<String, Map<Integer, T>>();
+        for (Map.Entry<TopicPartition, T> entry: data.entrySet()) {
+            String topic = entry.getKey().topic();
+            int partition = entry.getKey().partition();
+            Map<Integer, T> topicData = dataByTopic.get(topic);
+            if (topicData == null) {
+                topicData = new HashMap<Integer, T>();
+                dataByTopic.put(topic, topicData);
+            }
+            topicData.put(partition, entry.getValue());
+        }
+        return dataByTopic;
+    }
+
+    /**
+     * group partitions by topic
+     * @param partitions
+     * @return partitions per topic
+     */
+    public static Map<String, List<Integer>> groupDataByTopic(List<TopicPartition> partitions) {
+        Map<String, List<Integer>> partitionsByTopic = new HashMap<String, List<Integer>>();
+        for (TopicPartition tp: partitions) {
+            String topic = tp.topic();
+            List<Integer> topicData = partitionsByTopic.get(topic);
+            if (topicData == null) {
+                topicData = new ArrayList<Integer>();
+                partitionsByTopic.put(topic, topicData);
+            }
+            topicData.add(tp.partition());
+        }
+        return  partitionsByTopic;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/CopyOnWriteMap.java b/clients/src/main/java/org/apache/kafka/common/utils/CopyOnWriteMap.java
index 187d22fba5fe2..9c0e81ad80669 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/CopyOnWriteMap.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/CopyOnWriteMap.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.utils;
 
@@ -24,7 +20,7 @@
 import java.util.concurrent.ConcurrentMap;
 
 /**
- * A simple read-optimized map implementation that synchronizes only writes and does a fully copy on each modification
+ * A simple read-optimized map implementation that synchronizes only writes and does a full copy on each modification
  */
 public class CopyOnWriteMap<K, V> implements ConcurrentMap<K, V> {
 
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Crc32.java b/clients/src/main/java/org/apache/kafka/common/utils/Crc32.java
index 153c5a6d34529..047ca98ef6dd0 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/Crc32.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/Crc32.java
@@ -28,6 +28,30 @@
  */
 public class Crc32 implements Checksum {
 
+    /**
+     * Compute the CRC32 of the byte array
+     *
+     * @param bytes The array to compute the checksum for
+     * @return The CRC32
+     */
+    public static long crc32(byte[] bytes) {
+        return crc32(bytes, 0, bytes.length);
+    }
+
+    /**
+     * Compute the CRC32 of the segment of the byte array given by the specified size and offset
+     *
+     * @param bytes The bytes to checksum
+     * @param offset the offset at which to begin checksumming
+     * @param size the number of bytes to checksum
+     * @return The CRC32
+     */
+    public static long crc32(byte[] bytes, int offset, int size) {
+        Crc32 crc = new Crc32();
+        crc.update(bytes, offset, size);
+        return crc.getValue();
+    }
+
     /** the current CRC value, bit-flipped */
     private int crc;
 
@@ -97,6 +121,18 @@ final public void update(int b) {
         crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)];
     }
 
+    /**
+     * Update the CRC32 given an integer
+     */
+    final public void updateInt(int input) {
+        update((byte) (input >> 24));
+        update((byte) (input >> 16));
+        update((byte) (input >> 8));
+        update((byte) input /* >> 0 */);
+    }
+
+
+
     /*
      * CRC-32 lookup tables generated by the polynomial 0xEDB88320. See also TestPureJavaCrc32.Table.
      */
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/KafkaThread.java b/clients/src/main/java/org/apache/kafka/common/utils/KafkaThread.java
index 9ff793f38d70d..57247c85ba13b 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/KafkaThread.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/KafkaThread.java
@@ -1,32 +1,33 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.utils;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * A wrapper for Thread that sets things up nicely
  */
 public class KafkaThread extends Thread {
 
-    public KafkaThread(String name, Runnable runnable, boolean daemon) {
+    private final Logger log = LoggerFactory.getLogger(getClass());
+
+    public KafkaThread(final String name, Runnable runnable, boolean daemon) {
         super(runnable, name);
         setDaemon(daemon);
         setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
             public void uncaughtException(Thread t, Throwable e) {
-                e.printStackTrace();
+                log.error("Uncaught exception in " + name + ": ", e);
             }
         });
     }
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/SystemTime.java b/clients/src/main/java/org/apache/kafka/common/utils/SystemTime.java
index 6582c73dab634..d682bd46ec382 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/SystemTime.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/SystemTime.java
@@ -26,6 +26,7 @@ public long milliseconds() {
         return System.currentTimeMillis();
     }
 
+    @Override
     public long nanoseconds() {
         return System.nanoTime();
     }
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
index 9c34e7dc82f33..527dd0f9c47fc 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
@@ -1,29 +1,33 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.utils;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.kafka.common.KafkaException;
 
-
 public class Utils {
 
+    private static final Pattern HOST_PORT_PATTERN = Pattern.compile("\\[?(.+?)\\]?:(\\d+)");
+
+    public static String NL = System.getProperty("line.separator");
+
     /**
      * Turn the given UTF8 byte array into a string
      * 
@@ -73,6 +77,34 @@ public static long readUnsignedInt(ByteBuffer buffer, int index) {
         return buffer.getInt(index) & 0xffffffffL;
     }
 
+    /**
+     * Read an unsigned integer stored in little-endian format from the {@link InputStream}.
+     * 
+     * @param in The stream to read from
+     * @return The integer read (MUST BE TREATED WITH SPECIAL CARE TO AVOID SIGNEDNESS)
+     */
+    public static int readUnsignedIntLE(InputStream in) throws IOException {
+        return (in.read() << 8*0) 
+             | (in.read() << 8*1)
+             | (in.read() << 8*2)
+             | (in.read() << 8*3);
+    }
+
+    /**
+     * Read an unsigned integer stored in little-endian format from a byte array
+     * at a given offset.
+     * 
+     * @param buffer The byte array to read from
+     * @param offset The position in buffer to read from
+     * @return The integer read (MUST BE TREATED WITH SPECIAL CARE TO AVOID SIGNEDNESS)
+     */
+    public static int readUnsignedIntLE(byte[] buffer, int offset) {
+        return (buffer[offset++] << 8*0)
+             | (buffer[offset++] << 8*1)
+             | (buffer[offset++] << 8*2)
+             | (buffer[offset]   << 8*3);
+    }
+
     /**
      * Write the given long value as a 4 byte unsigned integer. Overflow is ignored.
      * 
@@ -95,29 +127,34 @@ public static void writeUnsignedInt(ByteBuffer buffer, int index, long value) {
     }
 
     /**
-     * Compute the CRC32 of the byte array
+     * Write an unsigned integer in little-endian format to the {@link OutputStream}.
      * 
-     * @param bytes The array to compute the checksum for
-     * @return The CRC32
+     * @param out The stream to write to
+     * @param value The value to write
      */
-    public static long crc32(byte[] bytes) {
-        return crc32(bytes, 0, bytes.length);
+    public static void writeUnsignedIntLE(OutputStream out, int value) throws IOException {
+        out.write(value >>> 8*0);
+        out.write(value >>> 8*1);
+        out.write(value >>> 8*2);
+        out.write(value >>> 8*3);
     }
 
     /**
-     * Compute the CRC32 of the segment of the byte array given by the specificed size and offset
+     * Write an unsigned integer in little-endian format to a byte array
+     * at a given offset.
      * 
-     * @param bytes The bytes to checksum
-     * @param offset the offset at which to begin checksumming
-     * @param size the number of bytes to checksum
-     * @return The CRC32
+     * @param buffer The byte array to write to
+     * @param offset The position in buffer to write to
+     * @param value The value to write
      */
-    public static long crc32(byte[] bytes, int offset, int size) {
-        Crc32 crc = new Crc32();
-        crc.update(bytes, offset, size);
-        return crc.getValue();
+    public static void writeUnsignedIntLE(byte[] buffer, int offset, int value) {
+        buffer[offset++] = (byte) (value >>> 8*0);
+        buffer[offset++] = (byte) (value >>> 8*1);
+        buffer[offset++] = (byte) (value >>> 8*2);
+        buffer[offset]   = (byte) (value >>> 8*3);
     }
 
+
     /**
      * Get the absolute value of the given number. If the number is Int.MinValue return 0. This is different from
      * java.lang.Math.abs or scala.math.abs in that they return Int.MinValue (!).
@@ -244,4 +281,36 @@ public static int murmur2(final byte[] data) {
         return h;
     }
 
+    /**
+     * Extracts the hostname from a "host:port" address string.
+     * @param address address string to parse
+     * @return hostname or null if the given address is incorrect
+     */
+    public static String getHost(String address) {
+        Matcher matcher = HOST_PORT_PATTERN.matcher(address);
+        return matcher.matches() ? matcher.group(1) : null;
+    }
+
+    /**
+     * Extracts the port number from a "host:port" address string.
+     * @param address address string to parse
+     * @return port number or null if the given address is incorrect
+     */
+    public static Integer getPort(String address) {
+        Matcher matcher = HOST_PORT_PATTERN.matcher(address);
+        return matcher.matches() ? Integer.parseInt(matcher.group(2)) : null;
+    }
+
+    /**
+     * Formats hostname and port number as a "host:port" address string,
+     * surrounding IPv6 addresses with braces '[', ']'
+     * @param host hostname
+     * @param port port number
+     * @return address string
+     */
+    public static String formatAddress(String host, Integer port) {
+        return host.contains(":")
+                ? "[" + host + "]:" + port // IPv6
+                : host + ":" + port;
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/MockClient.java b/clients/src/test/java/org/apache/kafka/clients/MockClient.java
new file mode 100644
index 0000000000000..47b5d4ac1f2a5
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/MockClient.java
@@ -0,0 +1,101 @@
+package org.apache.kafka.clients;
+
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.requests.RequestHeader;
+import org.apache.kafka.common.utils.Time;
+
+/**
+ * A mock network client for use testing code
+ */
+public class MockClient implements KafkaClient {
+
+    private final Time time;
+    private int correlation = 0;
+    private final Set<Integer> ready = new HashSet<Integer>();
+    private final Queue<ClientRequest> requests = new ArrayDeque<ClientRequest>();
+    private final Queue<ClientResponse> responses = new ArrayDeque<ClientResponse>();
+
+    public MockClient(Time time) {
+        this.time = time;
+    }
+
+    @Override
+    public boolean isReady(Node node, long now) {
+        return ready.contains(node.id());
+    }
+
+    @Override
+    public boolean ready(Node node, long now) {
+        boolean found = isReady(node, now);
+        ready.add(node.id());
+        return found;
+    }
+
+    @Override
+    public long connectionDelay(Node node, long now) {
+        return 0;
+    }
+
+    public void disconnect(Integer node) {
+        Iterator<ClientRequest> iter = requests.iterator();
+        while (iter.hasNext()) {
+            ClientRequest request = iter.next();
+            if (request.request().destination() == node) {
+                responses.add(new ClientResponse(request, time.milliseconds(), true, null));
+                iter.remove();
+            }
+        }
+        ready.remove(node);
+    }
+
+    @Override
+    public List<ClientResponse> poll(List<ClientRequest> requests, long timeoutMs, long now) {
+        this.requests.addAll(requests);
+        List<ClientResponse> copy = new ArrayList<ClientResponse>(this.responses);
+        this.responses.clear();
+        return copy;
+    }
+
+    public Queue<ClientRequest> requests() {
+        return this.requests;
+    }
+
+    public void respond(Struct body) {
+        ClientRequest request = requests.remove();
+        responses.add(new ClientResponse(request, time.milliseconds(), false, body));
+    }
+
+    @Override
+    public int inFlightRequestCount() {
+        return requests.size();
+    }
+
+    @Override
+    public RequestHeader nextRequestHeader(ApiKeys key) {
+        return new RequestHeader(key.id, "mock", correlation++);
+    }
+
+    @Override
+    public void wakeup() {
+    }
+
+    @Override
+    public void close() {
+    }
+
+    @Override
+    public Node leastLoadedNode(long now) {
+        return null;
+    }
+
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java b/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java
new file mode 100644
index 0000000000000..1a55242e9399f
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java
@@ -0,0 +1,101 @@
+package org.apache.kafka.clients;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.kafka.clients.producer.internals.Metadata;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.network.NetworkReceive;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ProtoUtils;
+import org.apache.kafka.common.protocol.types.Struct;
+import org.apache.kafka.common.requests.MetadataRequest;
+import org.apache.kafka.common.requests.ProduceRequest;
+import org.apache.kafka.common.requests.RequestHeader;
+import org.apache.kafka.common.requests.RequestSend;
+import org.apache.kafka.common.requests.ResponseHeader;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.test.MockSelector;
+import org.apache.kafka.test.TestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+public class NetworkClientTest {
+
+    private MockTime time = new MockTime();
+    private MockSelector selector = new MockSelector(time);
+    private Metadata metadata = new Metadata(0, Long.MAX_VALUE);
+    private int nodeId = 1;
+    private Cluster cluster = TestUtils.singletonCluster("test", nodeId);
+    private Node node = cluster.nodes().get(0);
+    private NetworkClient client = new NetworkClient(selector, metadata, "mock", Integer.MAX_VALUE, 0, 64 * 1024, 64 * 1024);
+
+    @Before
+    public void setup() {
+        metadata.update(cluster, time.milliseconds());
+    }
+
+    @Test
+    public void testReadyAndDisconnect() {
+        List<ClientRequest> reqs = new ArrayList<ClientRequest>();
+        assertFalse("Client begins unready as it has no connection.", client.ready(node, time.milliseconds()));
+        assertEquals("The connection is established as a side-effect of the readiness check", 1, selector.connected().size());
+        client.poll(reqs, 1, time.milliseconds());
+        selector.clear();
+        assertTrue("Now the client is ready", client.ready(node, time.milliseconds()));
+        selector.disconnect(node.id());
+        client.poll(reqs, 1, time.milliseconds());
+        selector.clear();
+        assertFalse("After we forced the disconnection the client is no longer ready.", client.ready(node, time.milliseconds()));
+        assertTrue("Metadata should get updated.", metadata.timeToNextUpdate(time.milliseconds()) == 0);
+    }
+
+    @Test(expected = IllegalStateException.class)
+    public void testSendToUnreadyNode() {
+        RequestSend send = new RequestSend(5,
+                                           client.nextRequestHeader(ApiKeys.METADATA),
+                                           new MetadataRequest(Arrays.asList("test")).toStruct());
+        ClientRequest request = new ClientRequest(time.milliseconds(), false, send, null);
+        client.poll(Arrays.asList(request), 1, time.milliseconds());
+    }
+
+    @Test
+    public void testSimpleRequestResponse() {
+        ProduceRequest produceRequest = new ProduceRequest((short) 1, 1000, Collections.<TopicPartition, ByteBuffer>emptyMap());
+        RequestHeader reqHeader = client.nextRequestHeader(ApiKeys.PRODUCE);
+        RequestSend send = new RequestSend(node.id(), reqHeader, produceRequest.toStruct());
+        ClientRequest request = new ClientRequest(time.milliseconds(), true, send, null);
+        awaitReady(client, node);
+        client.poll(Arrays.asList(request), 1, time.milliseconds());
+        assertEquals(1, client.inFlightRequestCount());
+        ResponseHeader respHeader = new ResponseHeader(reqHeader.correlationId());
+        Struct resp = new Struct(ProtoUtils.currentResponseSchema(ApiKeys.PRODUCE.id));
+        resp.set("responses", new Object[0]);
+        int size = respHeader.sizeOf() + resp.sizeOf();
+        ByteBuffer buffer = ByteBuffer.allocate(size);
+        respHeader.writeTo(buffer);
+        resp.writeTo(buffer);
+        buffer.flip();
+        selector.completeReceive(new NetworkReceive(node.id(), buffer));
+        List<ClientResponse> responses = client.poll(new ArrayList<ClientRequest>(), 1, time.milliseconds());
+        assertEquals(1, responses.size());
+        ClientResponse response = responses.get(0);
+        assertTrue("Should have a response body.", response.hasResponse());
+        assertEquals("Should be correlated to the original request", request, response.request());
+    }
+
+    private void awaitReady(NetworkClient client, Node node) {
+        while (!client.ready(node, time.milliseconds()))
+            client.poll(new ArrayList<ClientRequest>(), 1, time.milliseconds());
+    }
+
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerExampleTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerExampleTest.java
new file mode 100644
index 0000000000000..29ad25e90606f
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerExampleTest.java
@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+*/
+package org.apache.kafka.clients.consumer;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.kafka.common.TopicPartition;
+import org.junit.Test;
+
+/**
+ * TODO: Clean this after the consumer implementation is complete. Until then, it is useful to write some sample test code using the new APIs
+ *
+ */
+public class ConsumerExampleTest {
+    /**
+     * This example demonstrates how to use the consumer to leverage Kafka's group management functionality for automatic consumer load 
+     * balancing and failure detection. This example assumes that the offsets are stored in Kafka and are automatically committed periodically, 
+     * as controlled by the auto.commit.interval.ms config 
+     */
+//    @Test
+//    public void testConsumerGroupManagementWithAutoOffsetCommits() {
+//        Properties props = new Properties();
+//        props.put("metadata.broker.list", "localhost:9092");
+//        props.put("group.id", "test");
+//        props.put("session.timeout.ms", "1000");
+//        props.put("auto.commit.enable", "true");
+//        props.put("auto.commit.interval.ms", "10000");
+//        KafkaConsumer consumer = new KafkaConsumer(props);
+//        // subscribe to some topics
+//        consumer.subscribe("foo", "bar");
+//        boolean isRunning = true;
+//        while(isRunning) {
+//            Map<String, ConsumerRecords> records = consumer.poll(100);
+//            process(records);
+//        }
+//        consumer.close();
+//    }
+
+    /**
+     * This example demonstrates how to use the consumer to leverage Kafka's group management functionality for automatic consumer load 
+     * balancing and failure detection. This example assumes that the offsets are stored in Kafka and are manually committed using the 
+     * commit() API. This example also demonstrates rewinding the consumer's offsets if processing of consumed messages fails.
+     */
+//     @Test
+//     public void testConsumerGroupManagementWithManualOffsetCommit() {
+//         Properties props = new Properties();
+//         props.put("metadata.broker.list", "localhost:9092");
+//         props.put("group.id", "test");
+//         props.put("session.timeout.ms", "1000");
+//         props.put("auto.commit.enable", "false");
+//         KafkaConsumer consumer = new KafkaConsumer(props);
+//         // subscribe to some topics
+//         consumer.subscribe("foo", "bar");
+//         int commitInterval = 100;
+//         int numRecords = 0;
+//         boolean isRunning = true;
+//         Map<TopicPartition, Long> consumedOffsets = new HashMap<TopicPartition, Long>();
+//         while(isRunning) {
+//             Map<String, ConsumerRecords> records = consumer.poll(100);
+//             try {
+//                 Map<TopicPartition, Long> lastConsumedOffsets = process(records);
+//                 consumedOffsets.putAll(lastConsumedOffsets);
+//                 numRecords += records.size();
+//                 // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+//                 if(numRecords % commitInterval == 0) 
+//                     consumer.commit(true);
+//             } catch(Exception e) {
+//                 // rewind consumer's offsets for failed partitions
+//                 List<TopicPartition> failedPartitions = getFailedPartitions();
+//                 Map<TopicPartition, Long> offsetsToRewindTo = new HashMap<TopicPartition, Long>();
+//                 for(TopicPartition failedPartition : failedPartitions) {
+//                     // rewind to the last consumed offset for the failed partition. Since process() failed for this partition, the consumed offset
+//                     // should still be pointing to the last successfully processed offset and hence is the right offset to rewind consumption to.
+//                     offsetsToRewindTo.put(failedPartition, consumedOffsets.get(failedPartition));
+//                 }
+//                 // seek to new offsets only for partitions that failed the last process()
+//                 consumer.seek(offsetsToRewindTo);
+//             }
+//         }         
+//         consumer.close();
+//     }
+
+     private List<TopicPartition> getFailedPartitions() { return null; }
+
+    /**
+     * This example demonstrates the consumer can be used to leverage Kafka's group management functionality along with custom offset storage. 
+     * In this example, the assumption made is that the user chooses to store the consumer offsets outside Kafka. This requires the user to 
+     * plugin logic for retrieving the offsets from a custom store and provide the offsets to the consumer in the ConsumerRebalanceCallback
+     * callback. The onPartitionsAssigned callback is invoked after the consumer is assigned a new set of partitions on rebalance <i>and</i>
+     * before the consumption restarts post rebalance. This is the right place to supply offsets from a custom store to the consumer.
+     */
+//    @Test
+//    public void testConsumerRebalanceWithCustomOffsetStore() {
+//        Properties props = new Properties();
+//        props.put("metadata.broker.list", "localhost:9092");
+//        props.put("group.id", "test");
+//        props.put("session.timeout.ms", "1000");
+//        props.put("auto.commit.enable", "true");
+//        props.put("auto.commit.interval.ms", "10000");
+//        KafkaConsumer consumer = new KafkaConsumer(props,
+//                                                   new ConsumerRebalanceCallback() {
+//                                                        public void onPartitionsAssigned(Consumer consumer, Collection<TopicPartition> partitions) {
+//                                                           Map<TopicPartition, Long> lastCommittedOffsets = getLastCommittedOffsetsFromCustomStore(partitions);
+//                                                           consumer.seek(lastCommittedOffsets);
+//                                                        }
+//                                                        public void onPartitionsRevoked(Consumer consumer, Collection<TopicPartition> partitions) {
+//                                                            Map<TopicPartition, Long> offsets = getLastConsumedOffsets(partitions); // implemented by the user
+//                                                           commitOffsetsToCustomStore(offsets);                                            // implemented by the user
+//                                                        }
+//                                                        private Map<TopicPartition, Long> getLastCommittedOffsetsFromCustomStore(Collection<TopicPartition> partitions) {
+//                                                            return null;
+//                                                        }
+//                                                        private Map<TopicPartition, Long> getLastConsumedOffsets(Collection<TopicPartition> partitions) { return null; }
+//                                                        private void commitOffsetsToCustomStore(Map<TopicPartition, Long> offsets) {}
+//                                                    });
+//        // subscribe to topics
+//        consumer.subscribe("foo", "bar");
+//        int commitInterval = 100;
+//        int numRecords = 0;
+//        boolean isRunning = true;
+//        while(isRunning) {
+//            Map<String, ConsumerRecords> records = consumer.poll(100);
+//            Map<TopicPartition, Long> consumedOffsets = process(records);
+//            numRecords += records.size();
+//            // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+//            if(numRecords % commitInterval == 0) 
+//                commitOffsetsToCustomStore(consumedOffsets);
+//        }
+//        consumer.close();
+//    }
+
+    /**
+     * This example demonstrates how the consumer can be used to leverage Kafka's group management functionality along with Kafka based offset storage. 
+     * In this example, the assumption made is that the user chooses to use Kafka based offset management. 
+     */
+//    @Test
+//    public void testConsumerRewindWithGroupManagementAndKafkaOffsetStorage() {
+//        Properties props = new Properties();
+//        props.put("metadata.broker.list", "localhost:9092");
+//        props.put("group.id", "test");
+//        props.put("session.timeout.ms", "1000");
+//        props.put("auto.commit.enable", "false");        
+//        KafkaConsumer consumer = new KafkaConsumer(props,
+//                                                   new ConsumerRebalanceCallback() {
+//                                                        boolean rewindOffsets = true;
+//                                                        public void onPartitionsAssigned(Consumer consumer, Collection<TopicPartition> partitions) {
+//                                                            if(rewindOffsets) {
+//                                                            Map<TopicPartition, Long> latestCommittedOffsets = consumer.committed(null);
+//                                                            Map<TopicPartition, Long> newOffsets = rewindOffsets(latestCommittedOffsets, 100);
+//                                                            consumer.seek(newOffsets);
+//                                                            }
+//                                                        }
+//                                                        public void onPartitionsRevoked(Consumer consumer, Collection<TopicPartition> partitions) {
+//                                                            consumer.commit(true);
+//                                                        }
+//                                                        // this API rewinds every partition back by numberOfMessagesToRewindBackTo messages 
+//                                                        private Map<TopicPartition, Long> rewindOffsets(Map<TopicPartition, Long> currentOffsets,
+//                                                                                                        long numberOfMessagesToRewindBackTo) {
+//                                                            Map<TopicPartition, Long> newOffsets = new HashMap<TopicPartition, Long>();
+//                                                            for(Map.Entry<TopicPartition, Long> offset : currentOffsets.entrySet()) {
+//                                                                newOffsets.put(offset.getKey(), offset.getValue() - numberOfMessagesToRewindBackTo);
+//                                                            }
+//                                                            return newOffsets;
+//                                                        }
+//                                                    });
+//        // subscribe to topics
+//        consumer.subscribe("foo", "bar");
+//        int commitInterval = 100;
+//        int numRecords = 0;
+//        boolean isRunning = true;
+//        while(isRunning) {
+//            Map<String, ConsumerRecords> records = consumer.poll(100);
+//            Map<TopicPartition, Long> consumedOffsets = process(records);
+//            numRecords += records.size();
+//            // commit offsets for all partitions of topics foo, bar synchronously, owned by this consumer instance
+//            if(numRecords % commitInterval == 0) 
+//                commitOffsetsToCustomStore(consumedOffsets);
+//        }
+//        consumer.close();
+//    }
+
+    /**
+     * This example demonstrates how the consumer can be used to subscribe to specific partitions of certain topics and consume upto the latest
+     * available message for each of those partitions before shutting down. When used to subscribe to specific partitions, the user foregoes 
+     * the group management functionality and instead relies on manually configuring the consumer instances to subscribe to a set of partitions.
+     * This example assumes that the user chooses to use Kafka based offset storage. The user still has to specify a group.id to use Kafka 
+     * based offset management. However, session.timeout.ms is not required since the Kafka consumer only does failure detection with group 
+     * management.
+     */
+//    @Test
+//    public void testConsumerWithKafkaBasedOffsetManagement() {
+//        Properties props = new Properties();
+//        props.put("metadata.broker.list", "localhost:9092");
+//        props.put("group.id", "test");
+//        props.put("auto.commit.enable", "true");
+//        props.put("auto.commit.interval.ms", "10000");
+//        KafkaConsumer consumer = new KafkaConsumer(props);
+//        // subscribe to some partitions of topic foo
+//        TopicPartition partition0 = new TopicPartition("foo", 0);
+//        TopicPartition partition1 = new TopicPartition("foo", 1);
+//        TopicPartition[] partitions = new TopicPartition[2];
+//        partitions[0] = partition0;
+//        partitions[1] = partition1;
+//        consumer.subscribe(partitions);
+//        // find the last committed offsets for partitions 0,1 of topic foo
+//        Map<TopicPartition, Long> lastCommittedOffsets = consumer.committed(null);
+//        // seek to the last committed offsets to avoid duplicates
+//        consumer.seek(lastCommittedOffsets);        
+//        // find the offsets of the latest available messages to know where to stop consumption
+//        Map<TopicPartition, Long> latestAvailableOffsets = consumer.offsetsBeforeTime(-2, null);
+//        boolean isRunning = true;
+//        while(isRunning) {
+//            Map<String, ConsumerRecords> records = consumer.poll(100);
+//            Map<TopicPartition, Long> consumedOffsets = process(records);
+//            for(TopicPartition partition : partitions) {
+//                if(consumedOffsets.get(partition) >= latestAvailableOffsets.get(partition))
+//                    isRunning = false;
+//                else
+//                    isRunning = true;
+//            }
+//        }
+//        consumer.close();
+//    }
+
+    /**
+     * This example demonstrates how the consumer can be used to subscribe to specific partitions of certain topics and consume upto the latest
+     * available message for each of those partitions before shutting down. When used to subscribe to specific partitions, the user foregoes 
+     * the group management functionality and instead relies on manually configuring the consumer instances to subscribe to a set of partitions.
+     * This example assumes that the user chooses to use custom offset storage.
+     */
+    @Test
+    public void testConsumerWithCustomOffsetManagement() {
+//        Properties props = new Properties();
+//        props.put("metadata.broker.list", "localhost:9092");
+//        KafkaConsumer consumer = new KafkaConsumer(props);
+//        // subscribe to some partitions of topic foo
+//        TopicPartition partition0 = new TopicPartition("foo", 0);
+//        TopicPartition partition1 = new TopicPartition("foo", 1);
+//        TopicPartition[] partitions = new TopicPartition[2];
+//        partitions[0] = partition0;
+//        partitions[1] = partition1;
+//        consumer.subscribe(partitions);
+//        Map<TopicPartition, Long> lastCommittedOffsets = getLastCommittedOffsetsFromCustomStore();
+//        // seek to the last committed offsets to avoid duplicates
+//        consumer.seek(lastCommittedOffsets);        
+//        // find the offsets of the latest available messages to know where to stop consumption
+//        Map<TopicPartition, Long> latestAvailableOffsets = consumer.offsetsBeforeTime(-2, null);
+//        boolean isRunning = true;
+//        while(isRunning) {
+//            Map<String, ConsumerRecords> records = consumer.poll(100);
+//            Map<TopicPartition, Long> consumedOffsets = process(records);
+//            // commit offsets for partitions 0,1 for topic foo to custom store
+//            commitOffsetsToCustomStore(consumedOffsets);
+//            for(TopicPartition partition : partitions) {
+//                if(consumedOffsets.get(partition) >= latestAvailableOffsets.get(partition))
+//                    isRunning = false;
+//                else
+//                    isRunning = true;
+//            }            
+//        }         
+//        consumer.close();
+    }
+
+    private Map<TopicPartition, Long> getLastCommittedOffsetsFromCustomStore() { return null; }
+    private void commitOffsetsToCustomStore(Map<TopicPartition, Long> consumedOffsets) {}
+    private Map<TopicPartition, Long> process(Map<String, ConsumerRecords> records) {
+        Map<TopicPartition, Long> processedOffsets = new HashMap<TopicPartition, Long>();
+        for(Entry<String, ConsumerRecords> recordMetadata : records.entrySet()) {
+            List<ConsumerRecord> recordsPerTopic = recordMetadata.getValue().records();
+            for(int i = 0;i < recordsPerTopic.size();i++) {
+                ConsumerRecord record = recordsPerTopic.get(i);
+                // process record
+                try {
+                    processedOffsets.put(record.topicAndPartition(), record.offset());
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }                
+            }
+        }
+        return processedOffsets; 
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/BufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/BufferPoolTest.java
index f227b5c3ff365..fe3c13f319d48 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/BufferPoolTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/BufferPoolTest.java
@@ -16,9 +16,11 @@
  */
 package org.apache.kafka.clients.producer;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import org.apache.kafka.clients.producer.internals.BufferPool;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.test.TestUtils;
+import org.junit.Test;
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
@@ -26,13 +28,11 @@
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-
-import org.apache.kafka.clients.producer.BufferExhaustedException;
-import org.apache.kafka.clients.producer.internals.BufferPool;
-import org.apache.kafka.test.TestUtils;
-import org.junit.Test;
+import static org.junit.Assert.*;
 
 public class BufferPoolTest {
+    private MockTime time = new MockTime();
+    private Metrics metrics = new Metrics(time);
 
     /**
      * Test the simple non-blocking allocation paths
@@ -41,7 +41,7 @@ public class BufferPoolTest {
     public void testSimple() throws Exception {
         int totalMemory = 64 * 1024;
         int size = 1024;
-        BufferPool pool = new BufferPool(totalMemory, size, false);
+        BufferPool pool = new BufferPool(totalMemory, size, false, metrics, time);
         ByteBuffer buffer = pool.allocate(size);
         assertEquals("Buffer size should equal requested size.", size, buffer.limit());
         assertEquals("Unallocated memory should have shrunk", totalMemory - size, pool.unallocatedMemory());
@@ -68,7 +68,7 @@ public void testSimple() throws Exception {
      */
     @Test(expected = IllegalArgumentException.class)
     public void testCantAllocateMoreMemoryThanWeHave() throws Exception {
-        BufferPool pool = new BufferPool(1024, 512, true);
+        BufferPool pool = new BufferPool(1024, 512, true, metrics, time);
         ByteBuffer buffer = pool.allocate(1024);
         assertEquals(1024, buffer.limit());
         pool.deallocate(buffer);
@@ -77,7 +77,7 @@ public void testCantAllocateMoreMemoryThanWeHave() throws Exception {
 
     @Test
     public void testNonblockingMode() throws Exception {
-        BufferPool pool = new BufferPool(2, 1, false);
+        BufferPool pool = new BufferPool(2, 1, false, metrics, time);
         pool.allocate(1);
         try {
             pool.allocate(2);
@@ -92,7 +92,7 @@ public void testNonblockingMode() throws Exception {
      */
     @Test
     public void testDelayedAllocation() throws Exception {
-        BufferPool pool = new BufferPool(5 * 1024, 1024, true);
+        BufferPool pool = new BufferPool(5 * 1024, 1024, true, metrics, time);
         ByteBuffer buffer = pool.allocate(1024);
         CountDownLatch doDealloc = asyncDeallocate(pool, buffer);
         CountDownLatch allocation = asyncAllocate(pool, 5 * 1024);
@@ -141,7 +141,7 @@ public void testStressfulSituation() throws Exception {
         final int iterations = 50000;
         final int poolableSize = 1024;
         final int totalMemory = numThreads / 2 * poolableSize;
-        final BufferPool pool = new BufferPool(totalMemory, poolableSize, true);
+        final BufferPool pool = new BufferPool(totalMemory, poolableSize, true, metrics, time);
         List<StressTestThread> threads = new ArrayList<StressTestThread>();
         for (int i = 0; i < numThreads; i++)
             threads.add(new StressTestThread(pool, iterations));
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/MetadataTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/MetadataTest.java
index 09a5355d25a3b..4547bfcb44be4 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/MetadataTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/MetadataTest.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
@@ -21,6 +17,7 @@
 
 import org.apache.kafka.clients.producer.internals.Metadata;
 import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.test.TestUtils;
 import org.junit.Test;
 
@@ -34,11 +31,11 @@ public class MetadataTest {
     public void testMetadata() throws Exception {
         long time = 0;
         metadata.update(Cluster.empty(), time);
-        assertFalse("No update needed.", metadata.needsUpdate(time));
-        metadata.forceUpdate();
-        assertFalse("Still no updated needed due to backoff", metadata.needsUpdate(time));
+        assertFalse("No update needed.", metadata.timeToNextUpdate(time) == 0);
+        metadata.requestUpdate();
+        assertFalse("Still no updated needed due to backoff", metadata.timeToNextUpdate(time) == 0);
         time += refreshBackoffMs;
-        assertTrue("Update needed now that backoff time expired", metadata.needsUpdate(time));
+        assertTrue("Update needed now that backoff time expired", metadata.timeToNextUpdate(time) == 0);
         String topic = "my-topic";
         Thread t1 = asyncFetch(topic);
         Thread t2 = asyncFetch(topic);
@@ -47,15 +44,21 @@ public void testMetadata() throws Exception {
         metadata.update(TestUtils.singletonCluster(topic, 1), time);
         t1.join();
         t2.join();
-        assertFalse("No update needed.", metadata.needsUpdate(time));
+        assertFalse("No update needed.", metadata.timeToNextUpdate(time) == 0);
         time += metadataExpireMs;
-        assertTrue("Update needed due to stale metadata.", metadata.needsUpdate(time));
+        assertTrue("Update needed due to stale metadata.", metadata.timeToNextUpdate(time) == 0);
     }
 
     private Thread asyncFetch(final String topic) {
         Thread thread = new Thread() {
             public void run() {
-                metadata.fetch(topic, Integer.MAX_VALUE);
+                while (metadata.fetch().partitionsForTopic(topic) == null) {
+                    try {
+                        metadata.awaitUpdate(metadata.requestUpdate(), refreshBackoffMs);
+                    } catch(TimeoutException e) {
+                        // let it go
+                    }
+                }
             }
         };
         thread.start();
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
index 9a9411fc900bf..1e2ca03fafa36 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
@@ -37,7 +37,7 @@ public class MockProducerTest {
     @Test
     public void testAutoCompleteMock() throws Exception {
         MockProducer producer = new MockProducer(true);
-        ProducerRecord record = new ProducerRecord(topic, "key".getBytes(), "value".getBytes());
+        ProducerRecord record = new ProducerRecord<byte[], byte[]>(topic, "key".getBytes(), "value".getBytes());
         Future<RecordMetadata> metadata = producer.send(record);
         assertTrue("Send should be immediately complete", metadata.isDone());
         assertFalse("Send should be successful", isError(metadata));
@@ -51,8 +51,8 @@ public void testAutoCompleteMock() throws Exception {
     @Test
     public void testManualCompletion() throws Exception {
         MockProducer producer = new MockProducer(false);
-        ProducerRecord record1 = new ProducerRecord("topic", "key1".getBytes(), "value1".getBytes());
-        ProducerRecord record2 = new ProducerRecord("topic", "key2".getBytes(), "value2".getBytes());
+        ProducerRecord record1 = new ProducerRecord<byte[], byte[]>("topic", "key1".getBytes(), "value1".getBytes());
+        ProducerRecord record2 = new ProducerRecord<byte[], byte[]>("topic", "key2".getBytes(), "value2".getBytes());
         Future<RecordMetadata> md1 = producer.send(record1);
         assertFalse("Send shouldn't have completed", md1.isDone());
         Future<RecordMetadata> md2 = producer.send(record2);
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/PartitionerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/PartitionerTest.java
index c78da64b5ad7b..1d077fd4c56bf 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/PartitionerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/PartitionerTest.java
@@ -18,6 +18,7 @@
 
 import static java.util.Arrays.asList;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertTrue;
 
 import java.util.List;
@@ -49,23 +50,32 @@ public class PartitionerTest {
     public void testUserSuppliedPartitioning() {
         assertEquals("If the user supplies a partition we should use it.",
                      0,
-                     partitioner.partition(new ProducerRecord("test", 0, key, value), cluster));
+                     partitioner.partition(new ProducerRecord<byte[], byte[]>("test", 0, key, value), cluster));
     }
 
     @Test
     public void testKeyPartitionIsStable() {
-        int partition = partitioner.partition(new ProducerRecord("test", key, value), cluster);
+        int partition = partitioner.partition(new ProducerRecord<byte[], byte[]>("test", key, value), cluster);
         assertEquals("Same key should yield same partition",
                      partition,
-                     partitioner.partition(new ProducerRecord("test", key, "value2".getBytes()), cluster));
+                     partitioner.partition(new ProducerRecord<byte[], byte[]>("test", key, "value2".getBytes()), cluster));
+    }
+
+    @Test
+    public void testRoundRobinIsStable() {
+        int startPart = partitioner.partition(new ProducerRecord<byte[], byte[]>("test", value), cluster);
+        for (int i = 1; i <= 100; i++) {
+            int partition = partitioner.partition(new ProducerRecord<byte[], byte[]>("test", value), cluster);
+            assertEquals("Should yield a different partition each call with round-robin partitioner",
+                partition, (startPart + i) % 2);
+      }
     }
 
     @Test
     public void testRoundRobinWithDownNode() {
         for (int i = 0; i < partitions.size(); i++) {
-            int part = partitioner.partition(new ProducerRecord("test", value), cluster);
+            int part = partitioner.partition(new ProducerRecord<byte[], byte[]>("test", value), cluster);
             assertTrue("We should never choose a leader-less node in round robin", part >= 0 && part < 2);
-
         }
     }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/RecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/RecordAccumulatorTest.java
index 1bbe83c1bfd75..2c9932401d573 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/RecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/RecordAccumulatorTest.java
@@ -1,33 +1,35 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
 import static java.util.Arrays.asList;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertFalse;
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
-
+import java.util.Set;
 
 import org.apache.kafka.clients.producer.internals.RecordAccumulator;
 import org.apache.kafka.clients.producer.internals.RecordBatch;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.record.CompressionType;
@@ -39,25 +41,37 @@
 
 public class RecordAccumulatorTest {
 
-    private TopicPartition tp = new TopicPartition("test", 0);
+    private String topic = "test";
+    private int partition1 = 0;
+    private int partition2 = 1;
+    private int partition3 = 2;
+    private Node node1 = new Node(0, "localhost", 1111);
+    private Node node2 = new Node(1, "localhost", 1112);
+    private TopicPartition tp1 = new TopicPartition(topic, partition1);
+    private TopicPartition tp2 = new TopicPartition(topic, partition2);
+    private TopicPartition tp3 = new TopicPartition(topic, partition3);
+    private PartitionInfo part1 = new PartitionInfo(topic, partition1, node1, null, null);
+    private PartitionInfo part2 = new PartitionInfo(topic, partition2, node1, null, null);
+    private PartitionInfo part3 = new PartitionInfo(topic, partition3, node2, null, null);
     private MockTime time = new MockTime();
     private byte[] key = "key".getBytes();
     private byte[] value = "value".getBytes();
     private int msgSize = Records.LOG_OVERHEAD + Record.recordSize(key, value);
+    private Cluster cluster = new Cluster(Arrays.asList(node1, node2), Arrays.asList(part1, part2, part3));
     private Metrics metrics = new Metrics(time);
 
     @Test
     public void testFull() throws Exception {
         long now = time.milliseconds();
-        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 10L, false, metrics, time);
+        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 10L, 100L, false, metrics, time);
         int appends = 1024 / msgSize;
         for (int i = 0; i < appends; i++) {
-            accum.append(tp, key, value, CompressionType.NONE, null);
-            assertEquals("No partitions should be ready.", 0, accum.ready(now).size());
+            accum.append(tp1, key, value, CompressionType.NONE, null);
+            assertEquals("No partitions should be ready.", 0, accum.ready(cluster, now).readyNodes.size());
         }
-        accum.append(tp, key, value, CompressionType.NONE, null);
-        assertEquals("Our partition should be ready", asList(tp), accum.ready(time.milliseconds()));
-        List<RecordBatch> batches = accum.drain(asList(tp), Integer.MAX_VALUE);
+        accum.append(tp1, key, value, CompressionType.NONE, null);
+        assertEquals("Our partition's leader should be ready", Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes);
+        List<RecordBatch> batches = accum.drain(cluster, Collections.singleton(node1), Integer.MAX_VALUE, 0).get(node1.id());
         assertEquals(1, batches.size());
         RecordBatch batch = batches.get(0);
         Iterator<LogEntry> iter = batch.records.iterator();
@@ -72,20 +86,20 @@ public void testFull() throws Exception {
     @Test
     public void testAppendLarge() throws Exception {
         int batchSize = 512;
-        RecordAccumulator accum = new RecordAccumulator(batchSize, 10 * 1024, 0L, false, metrics, time);
-        accum.append(tp, key, new byte[2 * batchSize], CompressionType.NONE, null);
-        assertEquals("Our partition should be ready", asList(tp), accum.ready(time.milliseconds()));
+        RecordAccumulator accum = new RecordAccumulator(batchSize, 10 * 1024, 0L, 100L, false, metrics, time);
+        accum.append(tp1, key, new byte[2 * batchSize], CompressionType.NONE, null);
+        assertEquals("Our partition's leader should be ready", Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes);
     }
 
     @Test
     public void testLinger() throws Exception {
         long lingerMs = 10L;
-        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, lingerMs, false, metrics, time);
-        accum.append(tp, key, value, CompressionType.NONE, null);
-        assertEquals("No partitions should be ready", 0, accum.ready(time.milliseconds()).size());
+        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, lingerMs, 100L, false, metrics, time);
+        accum.append(tp1, key, value, CompressionType.NONE, null);
+        assertEquals("No partitions should be ready", 0, accum.ready(cluster, time.milliseconds()).readyNodes.size());
         time.sleep(10);
-        assertEquals("Our partition should be ready", asList(tp), accum.ready(time.milliseconds()));
-        List<RecordBatch> batches = accum.drain(asList(tp), Integer.MAX_VALUE);
+        assertEquals("Our partition's leader should be ready", Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes);
+        List<RecordBatch> batches = accum.drain(cluster, Collections.singleton(node1), Integer.MAX_VALUE, 0).get(node1.id());
         assertEquals(1, batches.size());
         RecordBatch batch = batches.get(0);
         Iterator<LogEntry> iter = batch.records.iterator();
@@ -97,16 +111,16 @@ public void testLinger() throws Exception {
 
     @Test
     public void testPartialDrain() throws Exception {
-        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 10L, false, metrics, time);
+        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 10L, 100L, false, metrics, time);
         int appends = 1024 / msgSize + 1;
-        List<TopicPartition> partitions = asList(new TopicPartition("test", 0), new TopicPartition("test", 1));
+        List<TopicPartition> partitions = asList(tp1, tp2);
         for (TopicPartition tp : partitions) {
             for (int i = 0; i < appends; i++)
                 accum.append(tp, key, value, CompressionType.NONE, null);
         }
-        assertEquals("Both partitions should be ready", 2, accum.ready(time.milliseconds()).size());
+        assertEquals("Partition's leader should be ready", Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes);
 
-        List<RecordBatch> batches = accum.drain(partitions, 1024);
+        List<RecordBatch> batches = accum.drain(cluster, Collections.singleton(node1), 1024, 0).get(node1.id());
         assertEquals("But due to size bound only one partition should have been retrieved", 1, batches.size());
     }
 
@@ -114,15 +128,15 @@ public void testPartialDrain() throws Exception {
     public void testStressfulSituation() throws Exception {
         final int numThreads = 5;
         final int msgs = 10000;
-        final int numParts = 10;
-        final RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 0L, true, metrics, time);
+        final int numParts = 2;
+        final RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, 0L, 100L, true, metrics, time);
         List<Thread> threads = new ArrayList<Thread>();
         for (int i = 0; i < numThreads; i++) {
             threads.add(new Thread() {
                 public void run() {
                     for (int i = 0; i < msgs; i++) {
                         try {
-                            accum.append(new TopicPartition("test", i % numParts), key, value, CompressionType.NONE, null);
+                            accum.append(new TopicPartition(topic, i % numParts), key, value, CompressionType.NONE, null);
                         } catch (Exception e) {
                             e.printStackTrace();
                         }
@@ -135,17 +149,54 @@ public void run() {
         int read = 0;
         long now = time.milliseconds();
         while (read < numThreads * msgs) {
-            List<TopicPartition> tps = accum.ready(now);
-            List<RecordBatch> batches = accum.drain(tps, 5 * 1024);
-            for (RecordBatch batch : batches) {
-                for (LogEntry entry : batch.records)
-                    read++;
+            Set<Node> nodes = accum.ready(cluster, now).readyNodes;
+            List<RecordBatch> batches = accum.drain(cluster, nodes, 5 * 1024, 0).get(node1.id());
+            if (batches != null) {
+                for (RecordBatch batch : batches) {
+                    for (LogEntry entry : batch.records)
+                        read++;
+                    accum.deallocate(batch);
+                }
             }
-            accum.deallocate(batches);
         }
 
         for (Thread t : threads)
             t.join();
     }
 
+
+    @Test
+    public void testNextReadyCheckDelay() throws Exception {
+        // Next check time will use lingerMs since this test won't trigger any retries/backoff
+        long lingerMs = 10L;
+        RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, lingerMs, 100L, false, metrics, time);
+        // Just short of going over the limit so we trigger linger time
+        int appends = 1024 / msgSize;
+
+        // Partition on node1 only
+        for (int i = 0; i < appends; i++)
+            accum.append(tp1, key, value, CompressionType.NONE, null);
+        RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
+        assertEquals("No nodes should be ready.", 0, result.readyNodes.size());
+        assertEquals("Next check time should be the linger time", lingerMs, result.nextReadyCheckDelayMs);
+
+        time.sleep(lingerMs / 2);
+
+        // Add partition on node2 only
+        for (int i = 0; i < appends; i++)
+            accum.append(tp3, key, value, CompressionType.NONE, null);
+        result = accum.ready(cluster, time.milliseconds());
+        assertEquals("No nodes should be ready.", 0, result.readyNodes.size());
+        assertEquals("Next check time should be defined by node1, half remaining linger time", lingerMs / 2, result.nextReadyCheckDelayMs);
+
+        // Add data for another partition on node1, enough to make data sendable immediately
+        for (int i = 0; i < appends+1; i++)
+            accum.append(tp2, key, value, CompressionType.NONE, null);
+        result = accum.ready(cluster, time.milliseconds());
+        assertEquals("Node1 should be ready", Collections.singleton(node1), result.readyNodes);
+        // Note this can actually be < linger time because it may use delays from partitions that aren't sendable
+        // but have leaders with other sendable data.
+        assertTrue("Next check time should be defined by node2, at most linger time", result.nextReadyCheckDelayMs <= lingerMs);
+    }
+
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/SenderTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/SenderTest.java
index 41c028bffbda1..ef2ca65cabe97 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/SenderTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/SenderTest.java
@@ -1,59 +1,65 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.clients.producer;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
-import java.nio.ByteBuffer;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 
-
-import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.clients.MockClient;
 import org.apache.kafka.clients.producer.internals.Metadata;
 import org.apache.kafka.clients.producer.internals.RecordAccumulator;
 import org.apache.kafka.clients.producer.internals.Sender;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.metrics.Metrics;
-import org.apache.kafka.common.network.NetworkReceive;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.protocol.ProtoUtils;
 import org.apache.kafka.common.protocol.types.Struct;
 import org.apache.kafka.common.record.CompressionType;
-import org.apache.kafka.common.requests.RequestSend;
-import org.apache.kafka.common.requests.ResponseHeader;
 import org.apache.kafka.common.utils.MockTime;
-import org.apache.kafka.test.MockSelector;
 import org.apache.kafka.test.TestUtils;
 import org.junit.Before;
 import org.junit.Test;
 
 public class SenderTest {
 
+    private static final int MAX_REQUEST_SIZE = 1024 * 1024;
+    private static final short ACKS_ALL = -1;
+    private static final int MAX_RETRIES = 0;
+    private static final int REQUEST_TIMEOUT_MS = 10000;
+
+    private TopicPartition tp = new TopicPartition("test", 0);
     private MockTime time = new MockTime();
-    private MockSelector selector = new MockSelector(time);
+    private MockClient client = new MockClient(time);
     private int batchSize = 16 * 1024;
     private Metadata metadata = new Metadata(0, Long.MAX_VALUE);
     private Cluster cluster = TestUtils.singletonCluster("test", 1);
     private Metrics metrics = new Metrics(time);
-    private RecordAccumulator accumulator = new RecordAccumulator(batchSize, 1024 * 1024, 0L, false, metrics, time);
-    private Sender sender = new Sender(selector, metadata, this.accumulator, "", 1024 * 1024, 0L, (short) -1, 10000, time);
+    private RecordAccumulator accumulator = new RecordAccumulator(batchSize, 1024 * 1024, 0L, 0L, false, metrics, time);
+    private Sender sender = new Sender(client,
+                                       metadata,
+                                       this.accumulator,
+                                       MAX_REQUEST_SIZE,
+                                       ACKS_ALL,
+                                       MAX_RETRIES,
+                                       REQUEST_TIMEOUT_MS,
+                                       metrics,
+                                       time);
 
     @Before
     public void setup() {
@@ -62,28 +68,73 @@ public void setup() {
 
     @Test
     public void testSimple() throws Exception {
-        TopicPartition tp = new TopicPartition("test", 0);
-        Future<RecordMetadata> future = accumulator.append(tp, "key".getBytes(), "value".getBytes(), CompressionType.NONE, null);
+        int offset = 0;
+        Future<RecordMetadata> future = accumulator.append(tp, "key".getBytes(), "value".getBytes(), CompressionType.NONE, null).future;
+        sender.run(time.milliseconds()); // connect
+        sender.run(time.milliseconds()); // send produce request
+        assertEquals("We should have a single produce request in flight.", 1, client.inFlightRequestCount());
+        client.respond(produceResponse(tp.topic(), tp.partition(), offset, Errors.NONE.code()));
         sender.run(time.milliseconds());
-        assertEquals("We should have connected", 1, selector.connected().size());
-        selector.clear();
-        sender.run(time.milliseconds());
-        assertEquals("Single request should be sent", 1, selector.completedSends().size());
-        RequestSend request = (RequestSend) selector.completedSends().get(0);
-        selector.clear();
-        long offset = 42;
-        selector.completeReceive(produceResponse(request.header().correlationId(),
-                                                 cluster.leaderFor(tp).id(),
-                                                 tp.topic(),
-                                                 tp.partition(),
-                                                 offset,
-                                                 Errors.NONE.code()));
+        assertEquals("All requests completed.", offset, client.inFlightRequestCount());
         sender.run(time.milliseconds());
         assertTrue("Request should be completed", future.isDone());
         assertEquals(offset, future.get().offset());
     }
 
-    private NetworkReceive produceResponse(int correlation, int source, String topic, int part, long offset, int error) {
+    @Test
+    public void testRetries() throws Exception {
+        // create a sender with retries = 1
+        int maxRetries = 1;
+        Sender sender = new Sender(client,
+                                   metadata,
+                                   this.accumulator,
+                                   MAX_REQUEST_SIZE,
+                                   ACKS_ALL,
+                                   maxRetries,
+                                   REQUEST_TIMEOUT_MS,
+                                   new Metrics(),
+                                   time);
+        // do a successful retry
+        Future<RecordMetadata> future = accumulator.append(tp, "key".getBytes(), "value".getBytes(), CompressionType.NONE, null).future;
+        sender.run(time.milliseconds()); // connect
+        sender.run(time.milliseconds()); // send produce request
+        assertEquals(1, client.inFlightRequestCount());
+        client.disconnect(client.requests().peek().request().destination());
+        assertEquals(0, client.inFlightRequestCount());
+        sender.run(time.milliseconds()); // receive error
+        sender.run(time.milliseconds()); // reconnect
+        sender.run(time.milliseconds()); // resend
+        assertEquals(1, client.inFlightRequestCount());
+        int offset = 0;
+        client.respond(produceResponse(tp.topic(), tp.partition(), offset, Errors.NONE.code()));
+        sender.run(time.milliseconds());
+        assertTrue("Request should have retried and completed", future.isDone());
+        assertEquals(offset, future.get().offset());
+
+        // do an unsuccessful retry
+        future = accumulator.append(tp, "key".getBytes(), "value".getBytes(), CompressionType.NONE, null).future;
+        sender.run(time.milliseconds()); // send produce request
+        for (int i = 0; i < maxRetries + 1; i++) {
+            client.disconnect(client.requests().peek().request().destination());
+            sender.run(time.milliseconds()); // receive error
+            sender.run(time.milliseconds()); // reconnect
+            sender.run(time.milliseconds()); // resend
+        }
+        sender.run(time.milliseconds());
+        completedWithError(future, Errors.NETWORK_EXCEPTION);
+    }
+
+    private void completedWithError(Future<RecordMetadata> future, Errors error) throws Exception {
+        assertTrue("Request should be completed", future.isDone());
+        try {
+            future.get();
+            fail("Should have thrown an exception.");
+        } catch (ExecutionException e) {
+            assertEquals(error.exception().getClass(), e.getCause().getClass());
+        }
+    }
+
+    private Struct produceResponse(String topic, int part, long offset, int error) {
         Struct struct = new Struct(ProtoUtils.currentResponseSchema(ApiKeys.PRODUCE.id));
         Struct response = struct.instance("responses");
         response.set("topic", topic);
@@ -93,12 +144,7 @@ private NetworkReceive produceResponse(int correlation, int source, String topic
         partResp.set("base_offset", offset);
         response.set("partition_responses", new Object[] { partResp });
         struct.set("responses", new Object[] { response });
-        ResponseHeader header = new ResponseHeader(correlation);
-        ByteBuffer buffer = ByteBuffer.allocate(header.sizeOf() + struct.sizeOf());
-        header.writeTo(buffer);
-        struct.writeTo(buffer);
-        buffer.rewind();
-        return new NetworkReceive(source, buffer);
+        return struct;
     }
 
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/config/AbstractConfigTest.java b/clients/src/test/java/org/apache/kafka/common/config/AbstractConfigTest.java
new file mode 100644
index 0000000000000..3cfd36d992f5f
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/config/AbstractConfigTest.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.kafka.common.config;
+
+import static org.junit.Assert.fail;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.kafka.common.config.ConfigDef.Importance;
+import org.apache.kafka.common.config.ConfigDef.Type;
+import org.apache.kafka.common.metrics.KafkaMetric;
+import org.apache.kafka.common.metrics.MetricsReporter;
+import org.junit.Test;
+
+public class AbstractConfigTest {
+
+  @Test
+  public void testConfiguredInstances() {
+    testValidInputs("");
+    testValidInputs("org.apache.kafka.common.config.AbstractConfigTest$TestMetricsReporter");
+    testValidInputs("org.apache.kafka.common.config.AbstractConfigTest$TestMetricsReporter,org.apache.kafka.common.config.AbstractConfigTest$TestMetricsReporter");
+    testInvalidInputs(",");
+    testInvalidInputs("org.apache.kafka.clients.producer.unknown-metrics-reporter");
+    testInvalidInputs("test1,test2");
+    testInvalidInputs("org.apache.kafka.common.config.AbstractConfigTest$TestMetricsReporter,");
+  }
+
+  private void testValidInputs(String configValue) {
+    Properties props = new Properties();
+    props.put(TestConfig.METRIC_REPORTER_CLASSES_CONFIG, configValue);
+    TestConfig config = new TestConfig(props);
+    try {
+      config.getConfiguredInstances(TestConfig.METRIC_REPORTER_CLASSES_CONFIG,
+          MetricsReporter.class);
+    } catch (ConfigException e) {
+       fail("No exceptions are expected here, valid props are :" + props);
+    }
+  }
+  
+  private void testInvalidInputs(String configValue) {
+    Properties props = new Properties();
+    props.put(TestConfig.METRIC_REPORTER_CLASSES_CONFIG, configValue);
+    TestConfig config = new TestConfig(props);
+    try {
+      config.getConfiguredInstances(TestConfig.METRIC_REPORTER_CLASSES_CONFIG,
+          MetricsReporter.class);
+      fail("Expected a config exception due to invalid props :" + props);
+    } catch (ConfigException e) {
+      // this is good
+    }
+  }
+
+  private static class TestConfig extends AbstractConfig {
+
+    private static final ConfigDef config;
+
+    public static final String METRIC_REPORTER_CLASSES_CONFIG = "metric.reporters";
+    private static final String METRIC_REPORTER_CLASSES_DOC = "A list of classes to use as metrics reporters.";
+
+    static {
+      config = new ConfigDef().define(METRIC_REPORTER_CLASSES_CONFIG,
+          Type.LIST, "", Importance.LOW, METRIC_REPORTER_CLASSES_DOC);
+    }
+
+    public TestConfig(Map<? extends Object, ? extends Object> props) {
+      super(config, props);
+    }
+  }
+  
+  public static class TestMetricsReporter implements MetricsReporter {
+
+    @Override
+    public void configure(Map<String, ?> configs) {
+    }
+
+    @Override
+    public void init(List<KafkaMetric> metrics) {
+}
+
+    @Override
+    public void metricChange(KafkaMetric metric) {
+    }
+
+    @Override
+    public void close() {
+    }
+  }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
index 29543dfa34d5f..3c442a27a7ba3 100644
--- a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.config;
 
@@ -20,14 +16,15 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Properties;
 
-
-import org.apache.kafka.common.config.ConfigDef;
-import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.ConfigDef.Importance;
+import org.apache.kafka.common.config.ConfigDef.Validator;
 import org.apache.kafka.common.config.ConfigDef.Range;
+import org.apache.kafka.common.config.ConfigDef.ValidString;
 import org.apache.kafka.common.config.ConfigDef.Type;
 import org.junit.Test;
 
@@ -35,13 +32,15 @@ public class ConfigDefTest {
 
     @Test
     public void testBasicTypes() {
-        ConfigDef def = new ConfigDef().define("a", Type.INT, 5, Range.between(0, 14), "docs")
-                                       .define("b", Type.LONG, "docs")
-                                       .define("c", Type.STRING, "hello", "docs")
-                                       .define("d", Type.LIST, "docs")
-                                       .define("e", Type.DOUBLE, "docs")
-                                       .define("f", Type.CLASS, "docs")
-                                       .define("g", Type.BOOLEAN, "docs");
+        ConfigDef def = new ConfigDef().define("a", Type.INT, 5, Range.between(0, 14), Importance.HIGH, "docs")
+                                       .define("b", Type.LONG, Importance.HIGH, "docs")
+                                       .define("c", Type.STRING, "hello", Importance.HIGH, "docs")
+                                       .define("d", Type.LIST, Importance.HIGH, "docs")
+                                       .define("e", Type.DOUBLE, Importance.HIGH, "docs")
+                                       .define("f", Type.CLASS, Importance.HIGH, "docs")
+                                       .define("g", Type.BOOLEAN, Importance.HIGH, "docs")
+                                       .define("h", Type.BOOLEAN, Importance.HIGH, "docs")
+                                       .define("i", Type.BOOLEAN, Importance.HIGH, "docs");
 
         Properties props = new Properties();
         props.put("a", "1   ");
@@ -50,6 +49,8 @@ public void testBasicTypes() {
         props.put("e", 42.5d);
         props.put("f", String.class.getName());
         props.put("g", "true");
+        props.put("h", "FalSE");
+        props.put("i", "TRUE");
 
         Map<String, Object> vals = def.parse(props);
         assertEquals(1, vals.get("a"));
@@ -59,26 +60,28 @@ public void testBasicTypes() {
         assertEquals(42.5d, vals.get("e"));
         assertEquals(String.class, vals.get("f"));
         assertEquals(true, vals.get("g"));
+        assertEquals(false, vals.get("h"));
+        assertEquals(true, vals.get("i"));
     }
 
     @Test(expected = ConfigException.class)
     public void testInvalidDefault() {
-        new ConfigDef().define("a", Type.INT, "hello", "docs");
+        new ConfigDef().define("a", Type.INT, "hello", Importance.HIGH, "docs");
     }
 
     @Test(expected = ConfigException.class)
     public void testNullDefault() {
-        new ConfigDef().define("a", Type.INT, null, null, "docs");
+        new ConfigDef().define("a", Type.INT, null, null, null, "docs");
     }
 
     @Test(expected = ConfigException.class)
     public void testMissingRequired() {
-        new ConfigDef().define("a", Type.INT, "docs").parse(new HashMap<String, Object>());
+        new ConfigDef().define("a", Type.INT, Importance.HIGH, "docs").parse(new HashMap<String, Object>());
     }
 
     @Test(expected = ConfigException.class)
     public void testDefinedTwice() {
-        new ConfigDef().define("a", Type.STRING, "docs").define("a", Type.INT, "docs");
+        new ConfigDef().define("a", Type.STRING, Importance.HIGH, "docs").define("a", Type.INT, Importance.HIGH, "docs");
     }
 
     @Test
@@ -88,13 +91,14 @@ public void testBadInputs() {
         testBadInputs(Type.DOUBLE, "hello", null, new Object());
         testBadInputs(Type.STRING, new Object());
         testBadInputs(Type.LIST, 53, new Object());
+        testBadInputs(Type.BOOLEAN, "hello", "truee", "fals");
     }
 
     private void testBadInputs(Type type, Object... values) {
         for (Object value : values) {
             Map<String, Object> m = new HashMap<String, Object>();
             m.put("name", value);
-            ConfigDef def = new ConfigDef().define("name", type, "docs");
+            ConfigDef def = new ConfigDef().define("name", type, Importance.HIGH, "docs");
             try {
                 def.parse(m);
                 fail("Expected a config exception on bad input for value " + value);
@@ -103,4 +107,42 @@ private void testBadInputs(Type type, Object... values) {
             }
         }
     }
+
+    @Test(expected = ConfigException.class)
+    public void testInvalidDefaultRange() {
+        ConfigDef def = new ConfigDef().define("name", Type.INT, -1, Range.between(0,10), Importance.HIGH, "docs");
+    }
+
+    @Test(expected = ConfigException.class)
+    public void testInvalidDefaultString() {
+        ConfigDef def = new ConfigDef().define("name", Type.STRING, "bad", ValidString.in(Arrays.asList("valid", "values")), Importance.HIGH, "docs");
+    }
+
+    @Test
+    public void testValidators() {
+        testValidators(Type.INT, Range.between(0,10), 5, new Object[]{1, 5, 9}, new Object[]{-1, 11});
+        testValidators(Type.STRING, ValidString.in(Arrays.asList("good", "values", "default")), "default",
+                new Object[]{"good", "values", "default"}, new Object[]{"bad", "inputs"});
+    }
+
+    private void testValidators(Type type, Validator validator, Object defaultVal, Object[] okValues, Object[] badValues) {
+        ConfigDef def = new ConfigDef().define("name", type, defaultVal, validator, Importance.HIGH, "docs");
+
+        for (Object value : okValues) {
+            Map<String, Object> m = new HashMap<String, Object>();
+            m.put("name", value);
+            def.parse(m);
+        }
+
+        for (Object value : badValues) {
+            Map<String, Object> m = new HashMap<String, Object>();
+            m.put("name", value);
+            try {
+                def.parse(m);
+                fail("Expected a config exception due to invalid value " + value);
+            } catch (ConfigException e) {
+                // this is good
+            }
+        }
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java b/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
index fdd89141579b6..19bea0f1fa1eb 100644
--- a/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.metrics;
 
@@ -22,25 +18,16 @@
 import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
-
 import org.apache.kafka.common.Metric;
-import org.apache.kafka.common.metrics.JmxReporter;
-import org.apache.kafka.common.metrics.Measurable;
-import org.apache.kafka.common.metrics.MetricConfig;
-import org.apache.kafka.common.metrics.Metrics;
-import org.apache.kafka.common.metrics.MetricsReporter;
-import org.apache.kafka.common.metrics.Quota;
-import org.apache.kafka.common.metrics.QuotaViolationException;
-import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.stats.Avg;
 import org.apache.kafka.common.metrics.stats.Count;
 import org.apache.kafka.common.metrics.stats.Max;
 import org.apache.kafka.common.metrics.stats.Min;
 import org.apache.kafka.common.metrics.stats.Percentile;
 import org.apache.kafka.common.metrics.stats.Percentiles;
+import org.apache.kafka.common.metrics.stats.Percentiles.BucketSizing;
 import org.apache.kafka.common.metrics.stats.Rate;
 import org.apache.kafka.common.metrics.stats.Total;
-import org.apache.kafka.common.metrics.stats.Percentiles.BucketSizing;
 import org.apache.kafka.common.utils.MockTime;
 import org.junit.Test;
 
@@ -130,34 +117,35 @@ public void testBadSensorHiearchy() {
     public void testEventWindowing() {
         Count count = new Count();
         MetricConfig config = new MetricConfig().eventWindow(1).samples(2);
-        count.record(config, 1.0, time.nanoseconds());
-        count.record(config, 1.0, time.nanoseconds());
-        assertEquals(2.0, count.measure(config, time.nanoseconds()), EPS);
-        count.record(config, 1.0, time.nanoseconds()); // first event times out
-        assertEquals(2.0, count.measure(config, time.nanoseconds()), EPS);
+        count.record(config, 1.0, time.milliseconds());
+        count.record(config, 1.0, time.milliseconds());
+        assertEquals(2.0, count.measure(config, time.milliseconds()), EPS);
+        count.record(config, 1.0, time.milliseconds()); // first event times out
+        assertEquals(2.0, count.measure(config, time.milliseconds()), EPS);
     }
 
     @Test
     public void testTimeWindowing() {
         Count count = new Count();
         MetricConfig config = new MetricConfig().timeWindow(1, TimeUnit.MILLISECONDS).samples(2);
-        count.record(config, 1.0, time.nanoseconds());
+        count.record(config, 1.0, time.milliseconds());
         time.sleep(1);
-        count.record(config, 1.0, time.nanoseconds());
-        assertEquals(2.0, count.measure(config, time.nanoseconds()), EPS);
+        count.record(config, 1.0, time.milliseconds());
+        assertEquals(2.0, count.measure(config, time.milliseconds()), EPS);
         time.sleep(1);
-        count.record(config, 1.0, time.nanoseconds()); // oldest event times out
-        assertEquals(2.0, count.measure(config, time.nanoseconds()), EPS);
+        count.record(config, 1.0, time.milliseconds()); // oldest event times out
+        assertEquals(2.0, count.measure(config, time.milliseconds()), EPS);
     }
 
     @Test
     public void testOldDataHasNoEffect() {
         Max max = new Max();
         long windowMs = 100;
-        MetricConfig config = new MetricConfig().timeWindow(windowMs, TimeUnit.MILLISECONDS);
-        max.record(config, 50, time.nanoseconds());
-        time.sleep(windowMs);
-        assertEquals(Double.NEGATIVE_INFINITY, max.measure(config, time.nanoseconds()), EPS);
+        int samples = 2;
+        MetricConfig config = new MetricConfig().timeWindow(windowMs, TimeUnit.MILLISECONDS).samples(samples);
+        max.record(config, 50, time.milliseconds());
+        time.sleep(samples * windowMs);
+        assertEquals(Double.NEGATIVE_INFINITY, max.measure(config, time.milliseconds()), EPS);
     }
 
     @Test(expected = IllegalArgumentException.class)
diff --git a/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java b/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
index 865996c1fb5d8..5c5e3d40819e4 100644
--- a/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.network;
 
@@ -27,16 +23,12 @@
 import java.net.ServerSocket;
 import java.net.Socket;
 import java.nio.ByteBuffer;
-import java.nio.channels.UnresolvedAddressException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-
-import org.apache.kafka.common.network.NetworkReceive;
-import org.apache.kafka.common.network.NetworkSend;
-import org.apache.kafka.common.network.Selectable;
-import org.apache.kafka.common.network.Selector;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.test.TestUtils;
 import org.junit.After;
@@ -58,7 +50,7 @@ public class SelectorTest {
     public void setup() throws Exception {
         this.server = new EchoServer();
         this.server.start();
-        this.selector = new Selector();
+        this.selector = new Selector(new Metrics(), new MockTime());
     }
 
     @After
@@ -125,7 +117,7 @@ public void testCantSendWithoutConnecting() throws Exception {
     /**
      * Sending a request to a node with a bad hostname should result in an exception during connect
      */
-    @Test(expected = UnresolvedAddressException.class)
+    @Test(expected = IOException.class)
     public void testNoRouteToHost() throws Exception {
         selector.connect(0, new InetSocketAddress("asdf.asdf.dsc", server.port), BUFFER_SIZE, BUFFER_SIZE);
     }
@@ -213,6 +205,12 @@ public void testEmptyRequest() throws Exception {
         assertEquals("", blockingRequest(node, ""));
     }
 
+    @Test(expected = IllegalStateException.class)
+    public void testExistingConnectionId() throws IOException {
+        blockingConnect(0);
+        blockingConnect(0);
+    }
+
     private String blockingRequest(int node, String s) throws IOException {
         selector.poll(1000L, asList(createSend(node, s)));
         while (true) {
diff --git a/clients/src/test/java/org/apache/kafka/common/record/MemoryRecordsTest.java b/clients/src/test/java/org/apache/kafka/common/record/MemoryRecordsTest.java
index b0745b528cef9..94a11121e207d 100644
--- a/clients/src/test/java/org/apache/kafka/common/record/MemoryRecordsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/record/MemoryRecordsTest.java
@@ -22,29 +22,35 @@
 import static org.junit.Assert.assertTrue;
 
 import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
 
-import org.apache.kafka.common.record.LogEntry;
-import org.apache.kafka.common.record.MemoryRecords;
-import org.apache.kafka.common.record.Record;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
+@RunWith(value = Parameterized.class)
 public class MemoryRecordsTest {
 
+    private CompressionType compression;
+
+    public MemoryRecordsTest(CompressionType compression) {
+        this.compression = compression;
+    }
+
     @Test
     public void testIterator() {
-        MemoryRecords recs1 = new MemoryRecords(ByteBuffer.allocate(1024));
-        MemoryRecords recs2 = new MemoryRecords(ByteBuffer.allocate(1024));
+        MemoryRecords recs1 = MemoryRecords.emptyRecords(ByteBuffer.allocate(1024), compression);
+        MemoryRecords recs2 = MemoryRecords.emptyRecords(ByteBuffer.allocate(1024), compression);
         List<Record> list = Arrays.asList(new Record("a".getBytes(), "1".getBytes()),
                                           new Record("b".getBytes(), "2".getBytes()),
                                           new Record("c".getBytes(), "3".getBytes()));
         for (int i = 0; i < list.size(); i++) {
             Record r = list.get(i);
             recs1.append(i, r);
-            recs2.append(i, toArray(r.key()), toArray(r.value()), r.compressionType());
+            recs2.append(i, toArray(r.key()), toArray(r.value()));
         }
+        recs1.close();
+        recs2.close();
 
         for (int iteration = 0; iteration < 2; iteration++) {
             for (MemoryRecords recs : Arrays.asList(recs1, recs2)) {
@@ -54,10 +60,18 @@ public void testIterator() {
                     LogEntry entry = iter.next();
                     assertEquals((long) i, entry.offset());
                     assertEquals(list.get(i), entry.record());
+                    entry.record().ensureValid();
                 }
                 assertFalse(iter.hasNext());
             }
         }
     }
 
+    @Parameterized.Parameters
+    public static Collection<Object[]> data() {
+        List<Object[]> values = new ArrayList<Object[]>();
+        for (CompressionType type: CompressionType.values())
+            values.add(new Object[] { type });
+        return values;
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/record/RecordTest.java b/clients/src/test/java/org/apache/kafka/common/record/RecordTest.java
index ae54d67da9907..2765913d5bfd4 100644
--- a/clients/src/test/java/org/apache/kafka/common/record/RecordTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/record/RecordTest.java
@@ -27,9 +27,6 @@
 import java.util.Collection;
 import java.util.List;
 
-import org.apache.kafka.common.record.CompressionType;
-import org.apache.kafka.common.record.InvalidRecordException;
-import org.apache.kafka.common.record.Record;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -66,6 +63,10 @@ public void testFields() {
     @Test
     public void testChecksum() {
         assertEquals(record.checksum(), record.computeChecksum());
+        assertEquals(record.checksum(), record.computeChecksum(
+            this.key == null ? null : this.key.array(),
+            this.value == null ? null : this.value.array(),
+            this.compression, 0, -1));
         assertTrue(record.isValid());
         for (int i = Record.CRC_OFFSET + Record.CRC_LENGTH; i < record.size(); i++) {
             Record copy = copyOf(record);
@@ -95,9 +96,11 @@ public void testEquality() {
 
     @Parameters
     public static Collection<Object[]> data() {
+        byte[] payload = new byte[1000];
+        Arrays.fill(payload, (byte) 1);
         List<Object[]> values = new ArrayList<Object[]>();
-        for (byte[] key : Arrays.asList(null, "".getBytes(), "key".getBytes()))
-            for (byte[] value : Arrays.asList(null, "".getBytes(), "value".getBytes()))
+        for (byte[] key : Arrays.asList(null, "".getBytes(), "key".getBytes(), payload))
+            for (byte[] value : Arrays.asList(null, "".getBytes(), "value".getBytes(), payload))
                 for (CompressionType compression : CompressionType.values())
                     values.add(new Object[] { key, value, compression });
         return values;
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
new file mode 100644
index 0000000000000..df37fc6d8f0db
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.junit.Test;
+
+import java.lang.reflect.Method;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class RequestResponseTest {
+
+    @Test
+    public void testSerialization() throws Exception{
+        List<AbstractRequestResponse> requestList = Arrays.asList(
+                createRequestHeader(),
+                createResponseHeader(),
+                createConsumerMetadataRequest(),
+                createConsumerMetadataResponse(),
+                createFetchRequest(),
+                createFetchResponse(),
+                createHeartBeatRequest(),
+                createHeartBeatResponse(),
+                createJoinGroupRequest(),
+                createJoinGroupResponse(),
+                createListOffsetRequest(),
+                createListOffsetResponse(),
+                createMetadataRequest(),
+                createMetadataResponse(),
+                createOffsetCommitRequest(),
+                createOffsetCommitResponse(),
+                createOffsetFetchRequest(),
+                createOffsetFetchResponse(),
+                createProduceRequest(),
+                createProduceResponse());
+
+        for (AbstractRequestResponse req: requestList) {
+            ByteBuffer buffer = ByteBuffer.allocate(req.sizeOf());
+            req.writeTo(buffer);
+            buffer.rewind();
+            Method deserializer = req.getClass().getDeclaredMethod("parse", ByteBuffer.class);
+            AbstractRequestResponse deserialized = (AbstractRequestResponse) deserializer.invoke(null, buffer);
+            assertEquals("The original and deserialized of " + req.getClass().getSimpleName() + " should be the same.", req, deserialized);
+            assertEquals("The original and deserialized of " + req.getClass().getSimpleName() + " should have the same hashcode.",
+                    req.hashCode(), deserialized.hashCode());
+        }
+    }
+
+    private AbstractRequestResponse createRequestHeader() {
+        return new RequestHeader((short)10, (short)1, "", 10);
+    }
+
+    private AbstractRequestResponse createResponseHeader() {
+        return new ResponseHeader(10);
+    }
+
+    private AbstractRequestResponse createConsumerMetadataRequest() {
+        return new ConsumerMetadataRequest("test-group");
+    }
+
+    private AbstractRequestResponse createConsumerMetadataResponse() {
+        return new ConsumerMetadataResponse((short)1, new Node(10, "host1", 2014));
+    }
+
+    private AbstractRequestResponse createFetchRequest() {
+        Map<TopicPartition, FetchRequest.PartitionData> fetchData = new HashMap<TopicPartition, FetchRequest.PartitionData>();
+        fetchData.put(new TopicPartition("test1", 0), new FetchRequest.PartitionData(100, 1000000));
+        fetchData.put(new TopicPartition("test2", 0), new FetchRequest.PartitionData(200, 1000000));
+        return new FetchRequest(-1, 100, 100000, fetchData);
+    }
+
+    private AbstractRequestResponse createFetchResponse() {
+        Map<TopicPartition, FetchResponse.PartitionData> responseData = new HashMap<TopicPartition, FetchResponse.PartitionData>();
+        responseData.put(new TopicPartition("test", 0), new FetchResponse.PartitionData((short)0, 1000000, ByteBuffer.allocate(10)));
+        return new FetchResponse(responseData);
+    }
+
+    private AbstractRequestResponse createHeartBeatRequest() {
+        return new HeartbeatRequest("group1", 1, "consumer1");
+    }
+
+    private AbstractRequestResponse createHeartBeatResponse() {
+        return new HeartbeatResponse((short)0);
+    }
+
+    private AbstractRequestResponse createJoinGroupRequest() {
+        return new JoinGroupRequest("group1", 30000, Arrays.asList("topic1"), "consumer1", "strategy1");
+    }
+
+    private AbstractRequestResponse createJoinGroupResponse() {
+        return new JoinGroupResponse((short)0, 1, "consumer1", Arrays.asList(new TopicPartition("test11", 1), new TopicPartition("test2", 1)));
+    }
+
+    private AbstractRequestResponse createListOffsetRequest() {
+        Map<TopicPartition, ListOffsetRequest.PartitionData> offsetData = new HashMap<TopicPartition, ListOffsetRequest.PartitionData>();
+        offsetData.put(new TopicPartition("test", 0), new ListOffsetRequest.PartitionData(1000000L, 10));
+        return new ListOffsetRequest(-1, offsetData);
+    }
+
+    private AbstractRequestResponse createListOffsetResponse() {
+        Map<TopicPartition, ListOffsetResponse.PartitionData> responseData = new HashMap<TopicPartition, ListOffsetResponse.PartitionData>();
+        responseData.put(new TopicPartition("test", 0), new ListOffsetResponse.PartitionData((short)0, Arrays.asList(100L)));
+        return new ListOffsetResponse(responseData);
+    }
+
+    private AbstractRequestResponse createMetadataRequest() {
+        return new MetadataRequest(Arrays.asList("topic1"));
+    }
+
+    private AbstractRequestResponse createMetadataResponse() {
+        Node node = new Node(1, "host1", 1001);
+        Node[] replicas = new Node[1];
+        replicas[0] = node;
+        Node[] isr = new Node[1];
+        isr[0] = node;
+        Cluster cluster = new Cluster(Arrays.asList(node), Arrays.asList(new PartitionInfo("topic1", 1, node, replicas, isr)));
+        return new MetadataResponse(cluster);
+    }
+
+    private AbstractRequestResponse createOffsetCommitRequest() {
+        Map<TopicPartition, OffsetCommitRequest.PartitionData> commitData = new HashMap<TopicPartition, OffsetCommitRequest.PartitionData>();
+        commitData.put(new TopicPartition("test", 0), new OffsetCommitRequest.PartitionData(100, 1000000, ""));
+        return new OffsetCommitRequest("group1", 100, "consumer1", commitData);
+    }
+
+    private AbstractRequestResponse createOffsetCommitResponse() {
+        Map<TopicPartition, Short> responseData = new HashMap<TopicPartition, Short>();
+        responseData.put(new TopicPartition("test", 0), (short)0);
+        return new OffsetCommitResponse(responseData);
+    }
+
+    private AbstractRequestResponse createOffsetFetchRequest() {
+        return new OffsetFetchRequest("group1", Arrays.asList(new TopicPartition("test11", 1)));
+    }
+
+    private AbstractRequestResponse createOffsetFetchResponse() {
+        Map<TopicPartition, OffsetFetchResponse.PartitionData> responseData = new HashMap<TopicPartition, OffsetFetchResponse.PartitionData>();
+        responseData.put(new TopicPartition("test", 0), new OffsetFetchResponse.PartitionData(100L, "", (short)0));
+        return new OffsetFetchResponse(responseData);
+    }
+
+    private AbstractRequestResponse createProduceRequest() {
+        Map<TopicPartition, ByteBuffer> produceData = new HashMap<TopicPartition, ByteBuffer>();
+        produceData.put(new TopicPartition("test", 0), ByteBuffer.allocate(10));
+        return new ProduceRequest((short)0, 5000, produceData);
+    }
+
+    private AbstractRequestResponse createProduceResponse() {
+        Map<TopicPartition, ProduceResponse.PartitionResponse> responseData = new HashMap<TopicPartition, ProduceResponse.PartitionResponse>();
+        responseData.put(new TopicPartition("test", 0), new ProduceResponse.PartitionResponse((short) 0, 10000));
+        return new ProduceResponse(responseData);
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/AbstractIteratorTest.java b/clients/src/test/java/org/apache/kafka/common/utils/AbstractIteratorTest.java
index 1df226606fad2..c788e66035b24 100644
--- a/clients/src/test/java/org/apache/kafka/common/utils/AbstractIteratorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/utils/AbstractIteratorTest.java
@@ -20,13 +20,8 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
+import java.util.*;
 
-import org.apache.kafka.common.utils.AbstractIterator;
 import org.junit.Test;
 
 public class AbstractIteratorTest {
@@ -49,7 +44,7 @@ public void testIterator() {
 
     @Test(expected = NoSuchElementException.class)
     public void testEmptyIterator() {
-        Iterator<Object> iter = new ListIterator<Object>(Arrays.asList());
+        Iterator<Object> iter = new ListIterator<Object>(Collections.emptyList());
         iter.next();
     }
 
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/ClientUtilsTest.java b/clients/src/test/java/org/apache/kafka/common/utils/ClientUtilsTest.java
new file mode 100644
index 0000000000000..6e37ea553f73d
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/utils/ClientUtilsTest.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.utils;
+
+import org.apache.kafka.common.config.ConfigException;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+public class ClientUtilsTest {
+
+    @Test
+    public void testParseAndValidateAddresses() {
+        check("127.0.0.1:8000");
+        check("mydomain.com:8080");
+        check("[::1]:8000");
+        check("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:1234", "mydomain.com:10000");
+    }
+
+    @Test(expected = ConfigException.class)
+    public void testNoPort() {
+        check("127.0.0.1");
+    }
+
+    private void check(String... url) {
+        ClientUtils.parseAndValidateAddresses(Arrays.asList(url));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/CrcTest.java b/clients/src/test/java/org/apache/kafka/common/utils/CrcTest.java
new file mode 100644
index 0000000000000..6b323819390b5
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/utils/CrcTest.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.utils;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+
+public class CrcTest {
+
+    @Test
+    public void testUpdate() {
+        final byte bytes[] = "Any String you want".getBytes();
+        final int len = bytes.length;
+
+        Crc32 crc1 = new Crc32();
+        Crc32 crc2 = new Crc32();
+        Crc32 crc3 = new Crc32();
+
+        crc1.update(bytes, 0, len);
+        for(int i = 0; i < len; i++)
+            crc2.update(bytes[i]);
+        crc3.update(bytes, 0, len/2);
+        crc3.update(bytes, len/2, len-len/2);
+
+        assertEquals("Crc values should be the same", crc1.getValue(), crc2.getValue());
+        assertEquals("Crc values should be the same", crc1.getValue(), crc3.getValue());
+    }
+
+    @Test
+    public void testUpdateInt() {
+        final int value = 1000;
+        final ByteBuffer buffer = ByteBuffer.allocate(4);
+        buffer.putInt(value);
+
+        Crc32 crc1 = new Crc32();
+        Crc32 crc2 = new Crc32();
+
+        crc1.updateInt(value);
+        crc2.update(buffer.array(), buffer.arrayOffset(), 4);
+
+        assertEquals("Crc values should be the same", crc1.getValue(), crc2.getValue());
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/MockTime.java b/clients/src/test/java/org/apache/kafka/common/utils/MockTime.java
index cda8e644587aa..eb7fcf07e529e 100644
--- a/clients/src/test/java/org/apache/kafka/common/utils/MockTime.java
+++ b/clients/src/test/java/org/apache/kafka/common/utils/MockTime.java
@@ -1,25 +1,22 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.common.utils;
 
 import java.util.concurrent.TimeUnit;
 
-import org.apache.kafka.common.utils.Time;
-
+/**
+ * A clock that you can manually advance by calling sleep
+ */
 public class MockTime implements Time {
 
     private long nanos = 0;
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
new file mode 100644
index 0000000000000..a39fab532f731
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.utils;
+
+import org.junit.Test;
+
+import static org.apache.kafka.common.utils.Utils.getHost;
+import static org.apache.kafka.common.utils.Utils.getPort;
+import static org.apache.kafka.common.utils.Utils.formatAddress;
+import static org.junit.Assert.*;
+
+public class UtilsTest {
+
+    @Test
+    public void testGetHost() {
+        assertEquals("127.0.0.1", getHost("127.0.0.1:8000"));
+        assertEquals("mydomain.com", getHost("mydomain.com:8080"));
+        assertEquals("::1", getHost("[::1]:1234"));
+        assertEquals("2001:db8:85a3:8d3:1319:8a2e:370:7348", getHost("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:5678"));
+    }
+
+    @Test
+    public void testGetPort() {
+        assertEquals(8000, getPort("127.0.0.1:8000").intValue());
+        assertEquals(8080, getPort("mydomain.com:8080").intValue());
+        assertEquals(1234, getPort("[::1]:1234").intValue());
+        assertEquals(5678, getPort("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:5678").intValue());
+    }
+
+    @Test
+    public void testFormatAddress() {
+        assertEquals("127.0.0.1:8000", formatAddress("127.0.0.1", 8000));
+        assertEquals("mydomain.com:8080", formatAddress("mydomain.com", 8080));
+        assertEquals("[::1]:1234", formatAddress("::1", 1234));
+        assertEquals("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:5678", formatAddress("2001:db8:85a3:8d3:1319:8a2e:370:7348", 5678));
+    }
+}
\ No newline at end of file
diff --git a/clients/src/test/java/org/apache/kafka/test/MetricsBench.java b/clients/src/test/java/org/apache/kafka/test/MetricsBench.java
index 7239b4a56e93f..9d98c11482554 100644
--- a/clients/src/test/java/org/apache/kafka/test/MetricsBench.java
+++ b/clients/src/test/java/org/apache/kafka/test/MetricsBench.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.test;
 
@@ -27,7 +23,6 @@
 import org.apache.kafka.common.metrics.stats.Percentiles;
 import org.apache.kafka.common.metrics.stats.Percentiles.BucketSizing;
 
-
 public class MetricsBench {
 
     public static void main(String[] args) {
@@ -48,7 +43,7 @@ public static void main(String[] args) {
         }
         long start = System.nanoTime();
         for (int i = 0; i < iters; i++)
-            child.record(i);
+            parent.record(i);
         double ellapsed = (System.nanoTime() - start) / (double) iters;
         System.out.println(String.format("%.2f ns per metric recording.", ellapsed));
     }
diff --git a/clients/src/test/java/org/apache/kafka/test/Microbenchmarks.java b/clients/src/test/java/org/apache/kafka/test/Microbenchmarks.java
index 46cf86e67e5ee..b24d4de21bfea 100644
--- a/clients/src/test/java/org/apache/kafka/test/Microbenchmarks.java
+++ b/clients/src/test/java/org/apache/kafka/test/Microbenchmarks.java
@@ -1,18 +1,14 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.kafka.test;
 
@@ -24,11 +20,11 @@
 import java.util.Random;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.kafka.common.utils.CopyOnWriteMap;
 import org.apache.kafka.common.utils.SystemTime;
 
-
 public class Microbenchmarks {
 
     public static void main(String[] args) throws Exception {
@@ -88,7 +84,7 @@ public void run() {
                         counter++;
                     }
                 }
-                System.out.println("synchronized: " + ((System.nanoTime() - start) / iters));
+                System.out.println("synchronized: " + ((time.nanoseconds() - start) / iters));
                 System.out.println(counter);
                 done.set(true);
             }
@@ -112,6 +108,43 @@ public void run() {
         t1.join();
         t2.join();
 
+        System.out.println("Testing locks");
+        done.set(false);
+        final ReentrantLock lock2 = new ReentrantLock();
+        Thread t3 = new Thread() {
+            public void run() {
+                time.sleep(1);
+                int counter = 0;
+                long start = time.nanoseconds();
+                for (int i = 0; i < iters; i++) {
+                    lock2.lock();
+                    counter++;
+                    lock2.unlock();
+                }
+                System.out.println("lock: " + ((time.nanoseconds() - start) / iters));
+                System.out.println(counter);
+                done.set(true);
+            }
+        };
+
+        Thread t4 = new Thread() {
+            public void run() {
+                int counter = 0;
+                while (!done.get()) {
+                    time.sleep(1);
+                    lock2.lock();
+                    counter++;
+                    lock2.unlock();
+                }
+                System.out.println("Counter: " + counter);
+            }
+        };
+
+        t3.start();
+        t4.start();
+        t3.join();
+        t4.join();
+
         Map<String, Integer> values = new HashMap<String, Integer>();
         for (int i = 0; i < 100; i++)
             values.put(Integer.toString(i), i);
diff --git a/clients/src/test/java/org/apache/kafka/test/TestUtils.java b/clients/src/test/java/org/apache/kafka/test/TestUtils.java
index 36cfc0fda742e..76a17e8849bad 100644
--- a/clients/src/test/java/org/apache/kafka/test/TestUtils.java
+++ b/clients/src/test/java/org/apache/kafka/test/TestUtils.java
@@ -88,7 +88,7 @@ public static int choosePort() {
     /**
      * Generate an array of random bytes
      * 
-     * @param numBytes The size of the array
+     * @param size The size of the array
      */
     public static byte[] randomBytes(int size) {
         byte[] bytes = new byte[size];
diff --git a/system_test/migration_tool_testsuite/0.7/bin/zookeeper-server-start.sh b/clients/src/test/resources/log4j.properties
old mode 100755
new mode 100644
similarity index 75%
rename from system_test/migration_tool_testsuite/0.7/bin/zookeeper-server-start.sh
rename to clients/src/test/resources/log4j.properties
index 184a10be05f58..b1d5b7f2b4091
--- a/system_test/migration_tool_testsuite/0.7/bin/zookeeper-server-start.sh
+++ b/clients/src/test/resources/log4j.properties
@@ -1,23 +1,21 @@
-#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+log4j.rootLogger=OFF, stdout
 
-if [ $# -ne 1 ];
-then
-	echo "USAGE: $0 zookeeper.properties"
-	exit 1
-fi
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n
 
-$(dirname $0)/kafka-run-class.sh org.apache.zookeeper.server.quorum.QuorumPeerMain $@
+log4j.logger.org.apache.kafka=ERROR
diff --git a/config/consumer.properties b/config/consumer.properties
index 7343cbc28cf8b..83847de30d10b 100644
--- a/config/consumer.properties
+++ b/config/consumer.properties
@@ -20,7 +20,7 @@
 zookeeper.connect=127.0.0.1:2181
 
 # timeout in ms for connecting to zookeeper
-zookeeper.connection.timeout.ms=1000000
+zookeeper.connection.timeout.ms=6000
 
 #consumer group id
 group.id=test-consumer-group
diff --git a/config/log4j.properties b/config/log4j.properties
index 1ab850772a965..c51ab8b6b20d2 100644
--- a/config/log4j.properties
+++ b/config/log4j.properties
@@ -13,8 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-kafka.logs.dir=logs
-
 log4j.rootLogger=INFO, stdout 
 
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
@@ -41,7 +39,7 @@ log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
 
 log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender
 log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.cleanerAppender.File=log-cleaner.log
+log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log
 log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout
 log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
 
@@ -73,8 +71,6 @@ log4j.additivity.kafka.controller=false
 
 log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender
 log4j.additivity.kafka.log.LogCleaner=false
-log4j.logger.kafka.log.Cleaner=INFO, cleanerAppender
-log4j.additivity.kafka.log.Cleaner=false
 
 log4j.logger.state.change.logger=TRACE, stateChangeAppender
 log4j.additivity.state.change.logger=false
diff --git a/config/producer.properties b/config/producer.properties
index 52a76114f5d09..47ae3e241301e 100644
--- a/config/producer.properties
+++ b/config/producer.properties
@@ -26,8 +26,8 @@ metadata.broker.list=localhost:9092
 # specifies whether the messages are sent asynchronously (async) or synchronously (sync)
 producer.type=sync
 
-# specify the compression codec for all data generated: none , gzip, snappy.
-# the old config values work as well: 0, 1, 2 for none, gzip, snappy, respectivally
+# specify the compression codec for all data generated: none, gzip, snappy, lz4.
+# the old config values work as well: 0, 1, 2, 3 for none, gzip, snappy, lz4, respectively
 compression.codec=none
 
 # message encoder
diff --git a/config/server.properties b/config/server.properties
index 2ffe0ebccf109..b0e4496a8ca73 100644
--- a/config/server.properties
+++ b/config/server.properties
@@ -37,16 +37,16 @@ port=9092
 #advertised.port=<port accessible by clients>
 
 # The number of threads handling network requests
-num.network.threads=2
+num.network.threads=3
  
 # The number of threads doing disk I/O
-num.io.threads=2
+num.io.threads=8
 
 # The send buffer (SO_SNDBUF) used by the socket server
-socket.send.buffer.bytes=1048576
+socket.send.buffer.bytes=102400
 
 # The receive buffer (SO_RCVBUF) used by the socket server
-socket.receive.buffer.bytes=1048576
+socket.receive.buffer.bytes=65536
 
 # The maximum size of a request that the socket server will accept (protection against OOM)
 socket.request.max.bytes=104857600
@@ -60,7 +60,11 @@ log.dirs=/tmp/kafka-logs
 # The default number of log partitions per topic. More partitions allow greater
 # parallelism for consumption, but this will also result in more files across
 # the brokers.
-num.partitions=2
+num.partitions=1
+
+# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
+# This value is recommended to be increased for installations with data dirs located in RAID array.
+num.recovery.threads.per.data.dir=1
 
 ############################# Log Flush Policy #############################
 
@@ -94,11 +98,15 @@ log.retention.hours=168
 #log.retention.bytes=1073741824
 
 # The maximum size of a log segment file. When this size is reached a new log segment will be created.
-log.segment.bytes=536870912
+log.segment.bytes=1073741824
 
 # The interval at which log segments are checked to see if they can be deleted according 
 # to the retention policies
-log.retention.check.interval.ms=60000
+log.retention.check.interval.ms=300000
+
+# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
+# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
+log.cleaner.enable=false
 
 ############################# Zookeeper #############################
 
@@ -110,7 +118,4 @@ log.retention.check.interval.ms=60000
 zookeeper.connect=localhost:2181
 
 # Timeout in ms for connecting to zookeeper
-zookeeper.connection.timeout.ms=1000000
-
-
-log.cleanup.policy=delete
+zookeeper.connection.timeout.ms=2000
diff --git a/config/test-log4j.properties b/config/test-log4j.properties
index a3ae33f20e4b7..e0bbc134233c9 100644
--- a/config/test-log4j.properties
+++ b/config/test-log4j.properties
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-log4j.rootLogger=INFO, stdout 
+log4j.rootLogger=INFO, stdout
 
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
@@ -45,8 +45,8 @@ log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
 # Turn on all our debugging info
 #log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender
 #log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender
-log4j.logger.kafka.perf=DEBUG, kafkaAppender
-log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender
+log4j.logger.kafka.tools=DEBUG, kafkaAppender
+log4j.logger.kafka.tools.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender
 #log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG
 log4j.logger.kafka=INFO, kafkaAppender
 
@@ -64,5 +64,3 @@ log4j.additivity.kafka.controller=false
 
 log4j.logger.state.change.logger=TRACE, stateChangeAppender
 log4j.additivity.state.change.logger=false
-
-
diff --git a/config/tools-log4j.properties b/config/tools-log4j.properties
index 7924049014983..52f07c96019b4 100644
--- a/config/tools-log4j.properties
+++ b/config/tools-log4j.properties
@@ -18,5 +18,3 @@ log4j.rootLogger=WARN, stdout
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-
diff --git a/contrib/LICENSE b/contrib/LICENSE
new file mode 120000
index 0000000000000..ea5b60640b01f
--- /dev/null
+++ b/contrib/LICENSE
@@ -0,0 +1 @@
+../LICENSE
\ No newline at end of file
diff --git a/contrib/NOTICE b/contrib/NOTICE
new file mode 120000
index 0000000000000..7e1b82f6e6a12
--- /dev/null
+++ b/contrib/NOTICE
@@ -0,0 +1 @@
+../NOTICE
\ No newline at end of file
diff --git a/contrib/hadoop-consumer/LICENSE b/contrib/hadoop-consumer/LICENSE
deleted file mode 100644
index 6b0b1270ff0ca..0000000000000
--- a/contrib/hadoop-consumer/LICENSE
+++ /dev/null
@@ -1,203 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
diff --git a/contrib/hadoop-consumer/build.sbt b/contrib/hadoop-consumer/build.sbt
deleted file mode 100644
index 02e95eb8ca2c7..0000000000000
--- a/contrib/hadoop-consumer/build.sbt
+++ /dev/null
@@ -1 +0,0 @@
-crossPaths := false
diff --git a/contrib/hadoop-consumer/lib/piggybank.jar b/contrib/hadoop-consumer/lib/piggybank.jar
deleted file mode 100644
index cbd46e065aab0..0000000000000
Binary files a/contrib/hadoop-consumer/lib/piggybank.jar and /dev/null differ
diff --git a/contrib/hadoop-consumer/src/main/java/kafka/etl/Props.java b/contrib/hadoop-consumer/src/main/java/kafka/etl/Props.java
index 3514ec77bfd1f..71eb80f313fa2 100644
--- a/contrib/hadoop-consumer/src/main/java/kafka/etl/Props.java
+++ b/contrib/hadoop-consumer/src/main/java/kafka/etl/Props.java
@@ -115,11 +115,11 @@ public Props(Properties... properties) {
 	}
 
 	/**
-	 * build props from a list of strings and interprate them as
+	 * build props from a list of strings and interpret them as
 	 * key, value, key, value,....
 	 * 
 	 * @param args
-	 * @return
+	 * @return props
 	 */
 	@SuppressWarnings("unchecked")
 	public static Props of(String... args) {
@@ -148,7 +148,7 @@ public void put(Properties properties) {
 	/**
 	 * get property of "key" and split the value by " ," 
 	 * @param key		
-	 * @return
+	 * @return list of values
 	 */
 	public List<String> getStringList(String key) {
 		return getStringList(key, "\\s*,\\s*");
@@ -158,7 +158,7 @@ public List<String> getStringList(String key) {
 	 * get property of "key" and split the value by "sep"
 	 * @param key
 	 * @param sep
-	 * @return
+	 * @return string list of values
 	 */
 	public List<String> getStringList(String key, String sep) {
 		String val =  super.getProperty(key);
@@ -176,7 +176,7 @@ public List<String> getStringList(String key, String sep) {
 	 * get string list with default value. default delimiter is ","
 	 * @param key
 	 * @param defaultValue
-	 * @return
+	 * @return string list of values
 	 */
 	public List<String> getStringList(String key, List<String> defaultValue) {
 		if (containsKey(key))
@@ -189,7 +189,7 @@ public List<String> getStringList(String key, List<String> defaultValue) {
 	 * get string list with default value
 	 * @param key
 	 * @param defaultValue
-	 * @return
+	 * @return string list of values
 	 */
 	public List<String> getStringList(String key, List<String> defaultValue,
 			String sep) {
@@ -251,10 +251,10 @@ else throw new UndefinedPropertyException ("Property " + key +
 	}
 
 	/**
-	 * get boolean value
+	 * get boolean value with default value
 	 * @param key
 	 * @param defaultValue
-	 * @return
+	 * @return boolean value
 	 * @throws Exception 	if value is not of type boolean or string
 	 */
 	public Boolean getBoolean(String key, Boolean defaultValue) 
@@ -265,8 +265,7 @@ public Boolean getBoolean(String key, Boolean defaultValue)
 	/**
 	 * get boolean value
 	 * @param key
-	 * @param defaultValue
-	 * @return
+	 * @return boolean value
 	 * @throws Exception 	if value is not of type boolean or string or 
 	 * 										if value doesn't exist
 	 */
@@ -275,10 +274,10 @@ public Boolean getBoolean(String key) throws Exception {
 	}
 
 	/**
-	 * get long value
-	 * @param key
+	 * get long value with default value
+	 * @param name
 	 * @param defaultValue
-	 * @return
+	 * @return long value
 	 * @throws Exception 	if value is not of type long or string
 	 */
 	public Long getLong(String name, Long defaultValue) 
@@ -288,9 +287,8 @@ public Long getLong(String name, Long defaultValue)
 
 	/**
 	 * get long value
-	 * @param key
-	 * @param defaultValue
-	 * @return
+	 * @param name
+	 * @return long value
 	 * @throws Exception 	if value is not of type long or string or 
 	 * 										if value doesn't exist
 	 */
@@ -299,10 +297,10 @@ public Long getLong(String name) throws Exception  {
 	}
 
 	/**
-	 * get integer value
-	 * @param key
+	 * get integer value with default value
+	 * @param name
 	 * @param defaultValue
-	 * @return
+	 * @return integer value
 	 * @throws Exception 	if value is not of type integer or string
 	 */
 	public Integer getInt(String name, Integer defaultValue) 
@@ -312,9 +310,8 @@ public Integer getInt(String name, Integer defaultValue)
 
 	/**
 	 * get integer value
-	 * @param key
-	 * @param defaultValue
-	 * @return
+	 * @param name
+	 * @return integer value
 	 * @throws Exception 	if value is not of type integer or string or 
 	 * 										if value doesn't exist
 	 */
@@ -323,10 +320,10 @@ public Integer getInt(String name) throws Exception {
 	}
 
 	/**
-	 * get double value
-	 * @param key
+	 * get double value with default value
+	 * @param name
 	 * @param defaultValue
-	 * @return
+	 * @return double value
 	 * @throws Exception 	if value is not of type double or string
 	 */
 	public Double getDouble(String name, double defaultValue) 
@@ -336,9 +333,8 @@ public Double getDouble(String name, double defaultValue)
 
 	/**
 	 * get double value
-	 * @param key
-	 * @param defaultValue
-	 * @return
+	 * @param name
+	 * @return double value
 	 * @throws Exception 	if value is not of type double or string or 
 	 * 										if value doesn't exist
 	 */
@@ -347,10 +343,10 @@ public double getDouble(String name) throws Exception {
 	}
 
 	/**
-	 * get URI value
-	 * @param key
+	 * get URI value with default value
+	 * @param name
 	 * @param defaultValue
-	 * @return
+	 * @return URI value
 	 * @throws Exception 	if value is not of type URI or string 
 	 */
 	public URI getUri(String name, URI defaultValue) throws Exception {
@@ -359,9 +355,9 @@ public URI getUri(String name, URI defaultValue) throws Exception {
 
 	/**
 	 * get URI value
-	 * @param key
+	 * @param name
 	 * @param defaultValue
-	 * @return
+	 * @return URI value
 	 * @throws Exception 	if value is not of type URI or string 
 	 */
 	public URI getUri(String name, String defaultValue) 
@@ -372,9 +368,8 @@ public URI getUri(String name, String defaultValue)
 
 	/**
 	 * get URI value
-	 * @param key
-	 * @param defaultValue
-	 * @return
+	 * @param name
+	 * @return URI value
 	 * @throws Exception 	if value is not of type URI or string or 
 	 * 										if value doesn't exist
 	 */
@@ -385,7 +380,7 @@ public URI getUri(String name) throws Exception {
 	/**
 	 * compare two props 
 	 * @param p
-	 * @return
+	 * @return true or false
 	 */
 	public boolean equalsProps(Props p) {
 		if (p == null) {
@@ -432,7 +427,7 @@ public void store(OutputStream out) throws IOException {
     
     /**
      * get all property names
-     * @return
+     * @return set of property names
      */
 	public Set<String> getKeySet() {
 		return super.stringPropertyNames();
@@ -453,7 +448,7 @@ public void logProperties(String comment) {
 	/**
 	 * clone a Props
 	 * @param p
-	 * @return
+	 * @return props
 	 */
 	public static Props clone(Props p) {
 		return new Props(p);
diff --git a/contrib/hadoop-consumer/src/main/java/kafka/etl/impl/DataGenerator.java b/contrib/hadoop-consumer/src/main/java/kafka/etl/impl/DataGenerator.java
index f3fb3fd99869e..d27a511fcdd73 100644
--- a/contrib/hadoop-consumer/src/main/java/kafka/etl/impl/DataGenerator.java
+++ b/contrib/hadoop-consumer/src/main/java/kafka/etl/impl/DataGenerator.java
@@ -27,7 +27,6 @@
 import kafka.etl.KafkaETLRequest;
 import kafka.etl.Props;
 import kafka.javaapi.producer.Producer;
-import kafka.message.Message;
 import kafka.producer.ProducerConfig;
 import kafka.producer.KeyedMessage;
 import org.apache.hadoop.fs.FileSystem;
@@ -36,6 +35,8 @@
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.mapred.JobConf;
 
+import static org.apache.kafka.common.utils.Utils.formatAddress;
+
 /**
  * Use this class to produce test events to Kafka server. Each event contains a
  * random timestamp in text format.
@@ -70,7 +71,7 @@ public DataGenerator(String id, Props props) throws Exception {
 		
 		System.out.println("server uri:" + _uri.toString());
         Properties producerProps = new Properties();
-        producerProps.put("metadata.broker.list", String.format("%s:%d", _uri.getHost(), _uri.getPort()));
+        producerProps.put("metadata.broker.list", formatAddress(_uri.getHost(), _uri.getPort()));
         producerProps.put("send.buffer.bytes", String.valueOf(TCP_BUFFER_SIZE));
         producerProps.put("connect.timeout.ms", String.valueOf(CONNECT_TIMEOUT));
         producerProps.put("reconnect.interval", String.valueOf(RECONNECT_INTERVAL));
@@ -108,7 +109,7 @@ protected void generateOffsets() throws Exception {
         if (fs.exists(outPath)) fs.delete(outPath);
         
         KafkaETLRequest request =
-            new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0);
+            new KafkaETLRequest(_topic, "tcp://" + formatAddress(_uri.getHost(), _uri.getPort()), 0);
 
         System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString());
         byte[] bytes = request.toString().getBytes("UTF-8");
diff --git a/contrib/hadoop-producer/LICENSE b/contrib/hadoop-producer/LICENSE
deleted file mode 100644
index 6b0b1270ff0ca..0000000000000
--- a/contrib/hadoop-producer/LICENSE
+++ /dev/null
@@ -1,203 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
diff --git a/contrib/hadoop-producer/build.sbt b/contrib/hadoop-producer/build.sbt
deleted file mode 100644
index 02e95eb8ca2c7..0000000000000
--- a/contrib/hadoop-producer/build.sbt
+++ /dev/null
@@ -1 +0,0 @@
-crossPaths := false
diff --git a/contrib/hadoop-producer/lib/piggybank.jar b/contrib/hadoop-producer/lib/piggybank.jar
deleted file mode 100644
index cbd46e065aab0..0000000000000
Binary files a/contrib/hadoop-producer/lib/piggybank.jar and /dev/null differ
diff --git a/core/build.sbt b/core/build.sbt
deleted file mode 100644
index 3eff64ef3f15a..0000000000000
--- a/core/build.sbt
+++ /dev/null
@@ -1,32 +0,0 @@
-import sbt._
-import Keys._
-import AssemblyKeys._
-
-name := "kafka"
-
-resolvers ++= Seq(
-  "SonaType ScalaTest repo" at "https://oss.sonatype.org/content/groups/public/org/scalatest/"
-)
-
-libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _ )
-
-libraryDependencies ++= Seq(
-  "org.apache.zookeeper"  % "zookeeper"   % "3.3.4",
-  "com.101tec"            % "zkclient"     % "0.3",
-  "org.xerial.snappy"     % "snappy-java" % "1.0.5",
-  "com.yammer.metrics"    % "metrics-core" % "2.2.0",
-  "com.yammer.metrics"    % "metrics-annotation" % "2.2.0",
-  "org.easymock"          % "easymock"    % "3.0" % "test",
-  "junit"                 % "junit"       % "4.1" % "test"
-)
-
-libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) =>
-  deps :+ (sv match {
-    case "2.8.0" => "org.scalatest" %  "scalatest" % "1.2" % "test"
-    case v if v.startsWith("2.10") =>  "org.scalatest" %% "scalatest" % "1.9.1" % "test"
-    case _       => "org.scalatest" %% "scalatest" % "1.8" % "test"
-  })
-}
-
-assemblySettings
-
diff --git a/core/src/main/scala/kafka/Kafka.scala b/core/src/main/scala/kafka/Kafka.scala
index 988014a51c332..77a49e12af6f8 100644
--- a/core/src/main/scala/kafka/Kafka.scala
+++ b/core/src/main/scala/kafka/Kafka.scala
@@ -17,34 +17,54 @@
 
 package kafka
 
-
+import scala.collection.JavaConversions._
+import joptsimple.OptionParser
 import metrics.KafkaMetricsReporter
 import server.{KafkaConfig, KafkaServerStartable, KafkaServer}
-import utils.{Utils, Logging}
+import kafka.utils.{CommandLineUtils, Utils, Logging}
 
 object Kafka extends Logging {
 
-  def main(args: Array[String]): Unit = {
-    if (args.length != 1) {
-      println("USAGE: java [options] %s server.properties".format(classOf[KafkaServer].getSimpleName()))
-      System.exit(1)
+  def getKafkaConfigFromArgs(args: Array[String]): KafkaConfig = {
+    val optionParser = new OptionParser
+    val overrideOpt = optionParser.accepts("override", "Optional property that should override values set in server.properties file")
+      .withRequiredArg()
+      .ofType(classOf[String])
+
+    if (args.length == 0) {
+      CommandLineUtils.printUsageAndDie(optionParser, "USAGE: java [options] %s server.properties [--override property=value]*".format(classOf[KafkaServer].getSimpleName()))
+    }
+
+    val props = Utils.loadProps(args(0))
+
+    if(args.length > 1) {
+      val options = optionParser.parse(args.slice(1, args.length): _*)
+
+      if(options.nonOptionArguments().size() > 0) {
+        CommandLineUtils.printUsageAndDie(optionParser, "Found non argument parameters: " + options.nonOptionArguments().toArray.mkString(","))
+      }
+
+      props.putAll(CommandLineUtils.parseKeyValueArgs(options.valuesOf(overrideOpt)))
     }
-  
+
+    new KafkaConfig(props)
+  }
+
+  def main(args: Array[String]): Unit = {
     try {
-      val props = Utils.loadProps(args(0))
-      val serverConfig = new KafkaConfig(props)
+      val serverConfig = getKafkaConfigFromArgs(args)
       KafkaMetricsReporter.startReporters(serverConfig.props)
-      val kafkaServerStartble = new KafkaServerStartable(serverConfig)
+      val kafkaServerStartable = new KafkaServerStartable(serverConfig)
 
       // attach shutdown handler to catch control-c
       Runtime.getRuntime().addShutdownHook(new Thread() {
         override def run() = {
-          kafkaServerStartble.shutdown
+          kafkaServerStartable.shutdown
         }
       })
 
-      kafkaServerStartble.startup
-      kafkaServerStartble.awaitShutdown
+      kafkaServerStartable.startup
+      kafkaServerStartable.awaitShutdown
     }
     catch {
       case e: Throwable => fatal(e)
diff --git a/core/src/main/scala/kafka/admin/AdminUtils.scala b/core/src/main/scala/kafka/admin/AdminUtils.scala
index 36ddeb44490e8..28b12c7b89a56 100644
--- a/core/src/main/scala/kafka/admin/AdminUtils.scala
+++ b/core/src/main/scala/kafka/admin/AdminUtils.scala
@@ -17,25 +17,30 @@
 
 package kafka.admin
 
-import java.util.Random
-import java.util.Properties
-import kafka.api.{TopicMetadata, PartitionMetadata}
+import kafka.common._
 import kafka.cluster.Broker
 import kafka.log.LogConfig
 import kafka.utils.{Logging, ZkUtils, Json}
-import org.I0Itec.zkclient.ZkClient
-import org.I0Itec.zkclient.exception.ZkNodeExistsException
+import kafka.api.{TopicMetadata, PartitionMetadata}
+
+import java.util.Random
+import java.util.Properties
+import scala.Some
+import scala.Predef._
 import scala.collection._
 import mutable.ListBuffer
 import scala.collection.mutable
-import kafka.common._
-import scala.Predef._
 import collection.Map
-import scala.Some
 import collection.Set
 
+import org.I0Itec.zkclient.ZkClient
+import org.I0Itec.zkclient.exception.ZkNodeExistsException
+
 object AdminUtils extends Logging {
   val rand = new Random
+
+  val AdminClientId = "__admin_client"
+
   val TopicConfigChangeZnodePrefix = "config_change_"
 
   /**
@@ -87,7 +92,23 @@ object AdminUtils extends Logging {
     ret.toMap
   }
 
-  def addPartitions(zkClient: ZkClient, topic: String, numPartitions: Int = 1, replicaAssignmentStr: String = "") {
+
+ /**
+  * Add partitions to existing topic with optional replica assignment
+  *
+  * @param zkClient Zookeeper client
+  * @param topic Topic for adding partitions to
+  * @param numPartitions Number of partitions to be set
+  * @param replicaAssignmentStr Manual replica assignment
+  * @param checkBrokerAvailable Ignore checking if assigned replica broker is available. Only used for testing
+  * @param config Pre-existing properties that should be preserved
+  */
+  def addPartitions(zkClient: ZkClient,
+                    topic: String,
+                    numPartitions: Int = 1,
+                    replicaAssignmentStr: String = "",
+                    checkBrokerAvailable: Boolean = true,
+                    config: Properties = new Properties) {
     val existingPartitionsReplicaList = ZkUtils.getReplicaAssignmentForTopics(zkClient, List(topic))
     if (existingPartitionsReplicaList.size == 0)
       throw new AdminOperationException("The topic %s does not exist".format(topic))
@@ -102,7 +123,7 @@ object AdminUtils extends Logging {
     val newPartitionReplicaList = if (replicaAssignmentStr == null || replicaAssignmentStr == "")
       AdminUtils.assignReplicasToBrokers(brokerList, partitionsToAdd, existingReplicaList.size, existingReplicaList.head, existingPartitionsReplicaList.size)
     else
-      getManualReplicaAssignment(replicaAssignmentStr, brokerList.toSet, existingPartitionsReplicaList.size)
+      getManualReplicaAssignment(replicaAssignmentStr, brokerList.toSet, existingPartitionsReplicaList.size, checkBrokerAvailable)
 
     // check if manual assignment has the right replication factor
     val unmatchedRepFactorList = newPartitionReplicaList.values.filter(p => (p.size != existingReplicaList.size))
@@ -114,10 +135,10 @@ object AdminUtils extends Logging {
     val partitionReplicaList = existingPartitionsReplicaList.map(p => p._1.partition -> p._2)
     // add the new list
     partitionReplicaList ++= newPartitionReplicaList
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, partitionReplicaList, update = true)
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, partitionReplicaList, config, true)
   }
 
-  def getManualReplicaAssignment(replicaAssignmentList: String, availableBrokerList: Set[Int], startPartitionId: Int): Map[Int, List[Int]] = {
+  def getManualReplicaAssignment(replicaAssignmentList: String, availableBrokerList: Set[Int], startPartitionId: Int, checkBrokerAvailable: Boolean = true): Map[Int, List[Int]] = {
     var partitionList = replicaAssignmentList.split(",")
     val ret = new mutable.HashMap[Int, List[Int]]()
     var partitionId = startPartitionId
@@ -128,7 +149,7 @@ object AdminUtils extends Logging {
         throw new AdminOperationException("replication factor must be larger than 0")
       if (brokerList.size != brokerList.toSet.size)
         throw new AdminOperationException("duplicate brokers in replica assignment: " + brokerList)
-      if (!brokerList.toSet.subsetOf(availableBrokerList))
+      if (checkBrokerAvailable && !brokerList.toSet.subsetOf(availableBrokerList))
         throw new AdminOperationException("some specified brokers not available. specified brokers: " + brokerList.toString +
           "available broker:" + availableBrokerList.toString)
       ret.put(partitionId, brokerList.toList)
diff --git a/core/src/main/scala/kafka/admin/DeleteTopicCommand.scala b/core/src/main/scala/kafka/admin/DeleteTopicCommand.scala
deleted file mode 100644
index 804b331a7e441..0000000000000
--- a/core/src/main/scala/kafka/admin/DeleteTopicCommand.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.admin
-
-import joptsimple.OptionParser
-import org.I0Itec.zkclient.ZkClient
-import kafka.utils.{Utils, ZKStringSerializer, ZkUtils}
-
-object DeleteTopicCommand {
-
-  def main(args: Array[String]): Unit = {
-    val parser = new OptionParser
-    val topicOpt = parser.accepts("topic", "REQUIRED: The topic to be deleted.")
-                         .withRequiredArg
-                         .describedAs("topic")
-                         .ofType(classOf[String])
-    val zkConnectOpt = parser.accepts("zookeeper", "REQUIRED: The connection string for the zookeeper connection in the form host:port. " +
-                                      "Multiple URLS can be given to allow fail-over.")
-                           .withRequiredArg
-                           .describedAs("urls")
-                           .ofType(classOf[String])
-
-    val options = parser.parse(args : _*)
-
-    for(arg <- List(topicOpt, zkConnectOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
-
-    val topic = options.valueOf(topicOpt)
-    val zkConnect = options.valueOf(zkConnectOpt)
-    var zkClient: ZkClient = null
-    try {
-      zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer)
-      zkClient.deleteRecursive(ZkUtils.getTopicPath(topic))
-      println("deletion succeeded!")
-    }
-    catch {
-      case e: Throwable =>
-        println("delection failed because of " + e.getMessage)
-        println(Utils.stackTrace(e))
-    }
-    finally {
-      if (zkClient != null)
-        zkClient.close()
-    }
-  }
-}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/admin/PreferredReplicaLeaderElectionCommand.scala b/core/src/main/scala/kafka/admin/PreferredReplicaLeaderElectionCommand.scala
index 9b3c6aeaf77db..79b5e0aeb4af7 100644
--- a/core/src/main/scala/kafka/admin/PreferredReplicaLeaderElectionCommand.scala
+++ b/core/src/main/scala/kafka/admin/PreferredReplicaLeaderElectionCommand.scala
@@ -40,6 +40,10 @@ object PreferredReplicaLeaderElectionCommand extends Logging {
       .withRequiredArg
       .describedAs("urls")
       .ofType(classOf[String])
+      
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "This tool causes leadership for each partition to be transferred back to the 'preferred replica'," + 
+                                                " it can be used to balance leadership among the servers.")
 
     val options = parser.parse(args : _*)
 
@@ -74,12 +78,17 @@ object PreferredReplicaLeaderElectionCommand extends Logging {
       case Some(m) =>
         m.asInstanceOf[Map[String, Any]].get("partitions") match {
           case Some(partitionsList) =>
-            val partitions = partitionsList.asInstanceOf[List[Map[String, Any]]]
-            partitions.map { p =>
+            val partitionsRaw = partitionsList.asInstanceOf[List[Map[String, Any]]]
+            val partitions = partitionsRaw.map { p =>
               val topic = p.get("topic").get.asInstanceOf[String]
               val partition = p.get("partition").get.asInstanceOf[Int]
               TopicAndPartition(topic, partition)
-            }.toSet
+            }
+            val duplicatePartitions = Utils.duplicates(partitions)
+            val partitionsSet = partitions.toSet
+            if (duplicatePartitions.nonEmpty)
+              throw new AdminOperationException("Preferred replica election data contains duplicate partitions: %s".format(duplicatePartitions.mkString(",")))
+            partitionsSet
           case None => throw new AdminOperationException("Preferred replica election data is empty")
         }
       case None => throw new AdminOperationException("Preferred replica election data is empty")
diff --git a/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala b/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
index 2637586af99cf..979992b68af37 100644
--- a/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
+++ b/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
@@ -31,10 +31,8 @@ object ReassignPartitionsCommand extends Logging {
 
     // should have exactly one action
     val actions = Seq(opts.generateOpt, opts.executeOpt, opts.verifyOpt).count(opts.options.has _)
-    if(actions != 1) {
-      opts.parser.printHelpOn(System.err)
-      Utils.croak("Command must include exactly one action: --generate, --execute or --verify")
-    }
+    if(actions != 1)
+      CommandLineUtils.printUsageAndDie(opts.parser, "Command must include exactly one action: --generate, --execute or --verify")
 
     CommandLineUtils.checkRequiredArgs(opts.parser, opts.options, opts.zkConnectOpt)
 
@@ -58,10 +56,8 @@ object ReassignPartitionsCommand extends Logging {
   }
 
   def verifyAssignment(zkClient: ZkClient, opts: ReassignPartitionsCommandOptions) {
-    if(!opts.options.has(opts.reassignmentJsonFileOpt)) {
-      opts.parser.printHelpOn(System.err)
-      Utils.croak("If --verify option is used, command must include --reassignment-json-file that was used during the --execute option")
-    }
+    if(!opts.options.has(opts.reassignmentJsonFileOpt))
+      CommandLineUtils.printUsageAndDie(opts.parser, "If --verify option is used, command must include --reassignment-json-file that was used during the --execute option")
     val jsonFile = opts.options.valueOf(opts.reassignmentJsonFileOpt)
     val jsonString = Utils.readFileAsString(jsonFile)
     val partitionsToBeReassigned = ZkUtils.parsePartitionReassignmentData(jsonString)
@@ -81,14 +77,18 @@ object ReassignPartitionsCommand extends Logging {
   }
 
   def generateAssignment(zkClient: ZkClient, opts: ReassignPartitionsCommandOptions) {
-    if(!(opts.options.has(opts.topicsToMoveJsonFileOpt) && opts.options.has(opts.brokerListOpt))) {
-      opts.parser.printHelpOn(System.err)
-      Utils.croak("If --generate option is used, command must include both --topics-to-move-json-file and --broker-list options")
-    }
+    if(!(opts.options.has(opts.topicsToMoveJsonFileOpt) && opts.options.has(opts.brokerListOpt)))
+      CommandLineUtils.printUsageAndDie(opts.parser, "If --generate option is used, command must include both --topics-to-move-json-file and --broker-list options")
     val topicsToMoveJsonFile = opts.options.valueOf(opts.topicsToMoveJsonFileOpt)
     val brokerListToReassign = opts.options.valueOf(opts.brokerListOpt).split(',').map(_.toInt)
+    val duplicateReassignments = Utils.duplicates(brokerListToReassign)
+    if (duplicateReassignments.nonEmpty)
+      throw new AdminCommandFailedException("Broker list contains duplicate entries: %s".format(duplicateReassignments.mkString(",")))
     val topicsToMoveJsonString = Utils.readFileAsString(topicsToMoveJsonFile)
     val topicsToReassign = ZkUtils.parseTopicsData(topicsToMoveJsonString)
+    val duplicateTopicsToReassign = Utils.duplicates(topicsToReassign)
+    if (duplicateTopicsToReassign.nonEmpty)
+      throw new AdminCommandFailedException("List of topics to reassign contains duplicate entries: %s".format(duplicateTopicsToReassign.mkString(",")))
     val topicPartitionsToReassign = ZkUtils.getReplicaAssignmentForTopics(zkClient, topicsToReassign)
 
     var partitionsToBeReassigned : Map[TopicAndPartition, Seq[Int]] = new mutable.HashMap[TopicAndPartition, List[Int]]()
@@ -105,24 +105,33 @@ object ReassignPartitionsCommand extends Logging {
   }
 
   def executeAssignment(zkClient: ZkClient, opts: ReassignPartitionsCommandOptions) {
-    if(!opts.options.has(opts.reassignmentJsonFileOpt)) {
-      opts.parser.printHelpOn(System.err)
-      Utils.croak("If --execute option is used, command must include --reassignment-json-file that was output " +
-        "during the --generate option")
-    }
+    if(!opts.options.has(opts.reassignmentJsonFileOpt))
+      CommandLineUtils.printUsageAndDie(opts.parser, "If --execute option is used, command must include --reassignment-json-file that was output " + "during the --generate option")
     val reassignmentJsonFile =  opts.options.valueOf(opts.reassignmentJsonFileOpt)
     val reassignmentJsonString = Utils.readFileAsString(reassignmentJsonFile)
-    val partitionsToBeReassigned = ZkUtils.parsePartitionReassignmentData(reassignmentJsonString)
+    val partitionsToBeReassigned = ZkUtils.parsePartitionReassignmentDataWithoutDedup(reassignmentJsonString)
     if (partitionsToBeReassigned.isEmpty)
       throw new AdminCommandFailedException("Partition reassignment data file %s is empty".format(reassignmentJsonFile))
-    val reassignPartitionsCommand = new ReassignPartitionsCommand(zkClient, partitionsToBeReassigned)
+    val duplicateReassignedPartitions = Utils.duplicates(partitionsToBeReassigned.map{ case(tp,replicas) => tp})
+    if (duplicateReassignedPartitions.nonEmpty)
+      throw new AdminCommandFailedException("Partition reassignment contains duplicate topic partitions: %s".format(duplicateReassignedPartitions.mkString(",")))
+    val duplicateEntries= partitionsToBeReassigned
+      .map{ case(tp,replicas) => (tp, Utils.duplicates(replicas))}
+      .filter{ case (tp,duplicatedReplicas) => duplicatedReplicas.nonEmpty }
+    if (duplicateEntries.nonEmpty) {
+      val duplicatesMsg = duplicateEntries
+        .map{ case (tp,duplicateReplicas) => "%s contains multiple entries for %s".format(tp, duplicateReplicas.mkString(",")) }
+        .mkString(". ")
+      throw new AdminCommandFailedException("Partition replica lists may not contain duplicate entries: %s".format(duplicatesMsg))
+    }
+    val reassignPartitionsCommand = new ReassignPartitionsCommand(zkClient, partitionsToBeReassigned.toMap)
     // before starting assignment, output the current replica assignment to facilitate rollback
     val currentPartitionReplicaAssignment = ZkUtils.getReplicaAssignmentForTopics(zkClient, partitionsToBeReassigned.map(_._1.topic).toSeq)
     println("Current partition replica assignment\n\n%s\n\nSave this to use as the --reassignment-json-file option during rollback"
       .format(ZkUtils.getPartitionReassignmentZkData(currentPartitionReplicaAssignment)))
     // start the reassignment
     if(reassignPartitionsCommand.reassignPartitions())
-      println("Successfully started reassignment of partitions %s".format(ZkUtils.getPartitionReassignmentZkData(partitionsToBeReassigned)))
+      println("Successfully started reassignment of partitions %s".format(ZkUtils.getPartitionReassignmentZkData(partitionsToBeReassigned.toMap)))
     else
       println("Failed to reassign partitions %s".format(partitionsToBeReassigned))
   }
@@ -185,6 +194,9 @@ object ReassignPartitionsCommand extends Logging {
                       .withRequiredArg
                       .describedAs("brokerlist")
                       .ofType(classOf[String])
+                      
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "This command moves topic partitions between replicas.")
 
     val options = parser.parse(args : _*)
   }
diff --git a/core/src/main/scala/kafka/admin/ShutdownBroker.scala b/core/src/main/scala/kafka/admin/ShutdownBroker.scala
deleted file mode 100644
index 2dd47e7362f42..0000000000000
--- a/core/src/main/scala/kafka/admin/ShutdownBroker.scala
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.admin
-
-
-import joptsimple.OptionParser
-import kafka.utils._
-import org.I0Itec.zkclient.ZkClient
-import javax.management.remote.{JMXServiceURL, JMXConnectorFactory}
-import javax.management.ObjectName
-import kafka.controller.KafkaController
-import scala.Some
-import kafka.common.{TopicAndPartition, BrokerNotAvailableException}
-
-
-object ShutdownBroker extends Logging {
-
-  private case class ShutdownParams(zkConnect: String, brokerId: java.lang.Integer)
-
-  private def invokeShutdown(params: ShutdownParams): Boolean = {
-    var zkClient: ZkClient = null
-    try {
-      zkClient = new ZkClient(params.zkConnect, 30000, 30000, ZKStringSerializer)
-      val controllerBrokerId = ZkUtils.getController(zkClient)
-      ZkUtils.readDataMaybeNull(zkClient, ZkUtils.BrokerIdsPath + "/" + controllerBrokerId)._1 match {
-        case Some(controllerInfo) =>
-          var controllerHost: String = null
-          var controllerJmxPort: Int = -1
-          try {
-            Json.parseFull(controllerInfo) match {
-              case Some(m) =>
-                val brokerInfo = m.asInstanceOf[Map[String, Any]]
-                controllerHost = brokerInfo.get("host").get.toString
-                controllerJmxPort = brokerInfo.get("jmx_port").get.asInstanceOf[Int]
-              case None =>
-                throw new BrokerNotAvailableException("Broker id %d does not exist".format(controllerBrokerId))
-            }
-          }
-          val jmxUrl = new JMXServiceURL("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi".format(controllerHost, controllerJmxPort))
-          info("Connecting to jmx url " + jmxUrl)
-          val jmxc = JMXConnectorFactory.connect(jmxUrl, null)
-          val mbsc = jmxc.getMBeanServerConnection
-          val leaderPartitionsRemaining = mbsc.invoke(new ObjectName(KafkaController.MBeanName),
-                                                      "shutdownBroker",
-                                                      Array(params.brokerId),
-                                                      Array(classOf[Int].getName)).asInstanceOf[Set[TopicAndPartition]]
-          val shutdownComplete = (leaderPartitionsRemaining.size == 0)
-          info("Shutdown status: " +
-            (if (shutdownComplete) "complete" else "incomplete (broker still leads %d partitions)".format(leaderPartitionsRemaining)))
-          shutdownComplete
-        case None =>
-          throw new BrokerNotAvailableException("Broker id %d does not exist".format(controllerBrokerId))
-      }
-    } catch {
-      case t: Throwable =>
-        error("Operation failed due to controller failure", t)
-        false
-    } finally {
-      if (zkClient != null)
-        zkClient.close()
-    }
-  }
-
-  def main(args: Array[String]) {
-    val parser = new OptionParser
-    val brokerOpt = parser.accepts("broker", "REQUIRED: The broker to shutdown.")
-            .withRequiredArg
-            .describedAs("Broker Id")
-            .ofType(classOf[java.lang.Integer])
-    val zkConnectOpt = parser.accepts("zookeeper", "REQUIRED: The connection string for the zookeeper connection in the form host:port. " +
-            "Multiple URLS can be given to allow fail-over.")
-            .withRequiredArg
-            .describedAs("urls")
-            .ofType(classOf[String])
-    val numRetriesOpt = parser.accepts("num.retries", "Number of attempts to retry if shutdown does not complete.")
-            .withRequiredArg
-            .describedAs("number of retries")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(0)
-    val retryIntervalOpt = parser.accepts("retry.interval.ms", "Retry interval if retries requested.")
-            .withRequiredArg
-            .describedAs("retry interval in ms (> 1000)")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(1000)
-
-    val options = parser.parse(args : _*)
-    CommandLineUtils.checkRequiredArgs(parser, options, brokerOpt, zkConnectOpt)
-
-    val retryIntervalMs = options.valueOf(retryIntervalOpt).intValue.max(1000)
-    val numRetries = options.valueOf(numRetriesOpt).intValue
-
-    val shutdownParams = ShutdownParams(options.valueOf(zkConnectOpt), options.valueOf(brokerOpt))
-
-    if (!invokeShutdown(shutdownParams)) {
-      (1 to numRetries).takeWhile(attempt => {
-        info("Retry " + attempt)
-        try {
-          Thread.sleep(retryIntervalMs)
-        }
-        catch {
-          case ie: InterruptedException => // ignore
-        }
-        !invokeShutdown(shutdownParams)
-      })
-    }
-  }
-
-}
-
diff --git a/core/src/main/scala/kafka/admin/TopicCommand.scala b/core/src/main/scala/kafka/admin/TopicCommand.scala
index fc8d6861ccab5..285c0333ff435 100644
--- a/core/src/main/scala/kafka/admin/TopicCommand.scala
+++ b/core/src/main/scala/kafka/admin/TopicCommand.scala
@@ -19,13 +19,18 @@ package kafka.admin
 
 import joptsimple._
 import java.util.Properties
+import kafka.common.AdminCommandFailedException
 import kafka.utils._
 import org.I0Itec.zkclient.ZkClient
+import org.I0Itec.zkclient.exception.ZkNodeExistsException
 import scala.collection._
 import scala.collection.JavaConversions._
 import kafka.cluster.Broker
 import kafka.log.LogConfig
 import kafka.consumer.Whitelist
+import kafka.server.OffsetManager
+import org.apache.kafka.common.utils.Utils.formatAddress
+
 
 object TopicCommand {
 
@@ -33,13 +38,13 @@ object TopicCommand {
     
     val opts = new TopicCommandOptions(args)
     
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(opts.parser, "Create, delete, describe, or change a topic.")
+    
     // should have exactly one action
-    val actions = Seq(opts.createOpt, opts.deleteOpt, opts.listOpt, opts.alterOpt, opts.describeOpt).count(opts.options.has _)
-    if(actions != 1) {
-      System.err.println("Command must include exactly one action: --list, --describe, --create, --delete, or --alter")
-      opts.parser.printHelpOn(System.err)
-      System.exit(1)
-    }
+    val actions = Seq(opts.createOpt, opts.listOpt, opts.alterOpt, opts.describeOpt, opts.deleteOpt).count(opts.options.has _)
+    if(actions != 1) 
+      CommandLineUtils.printUsageAndDie(opts.parser, "Command must include exactly one action: --list, --describe, --create, --alter or --delete")
 
     opts.checkArgs()
 
@@ -50,14 +55,14 @@ object TopicCommand {
         createTopic(zkClient, opts)
       else if(opts.options.has(opts.alterOpt))
         alterTopic(zkClient, opts)
-      else if(opts.options.has(opts.deleteOpt))
-        deleteTopic(zkClient, opts)
       else if(opts.options.has(opts.listOpt))
         listTopics(zkClient, opts)
       else if(opts.options.has(opts.describeOpt))
         describeTopic(zkClient, opts)
+      else if(opts.options.has(opts.deleteOpt))
+        deleteTopic(zkClient, opts)
     } catch {
-      case e =>
+      case e: Throwable =>
         println("Error while executing topic command " + e.getMessage)
         println(Utils.stackTrace(e))
     } finally {
@@ -70,7 +75,7 @@ object TopicCommand {
     if (opts.options.has(opts.topicOpt)) {
       val topicsSpec = opts.options.valueOf(opts.topicOpt)
       val topicsFilter = new Whitelist(topicsSpec)
-      allTopics.filter(topicsFilter.isTopicAllowed)
+      allTopics.filter(topicsFilter.isTopicAllowed(_, excludeInternalTopics = false))
     } else
       allTopics
   }
@@ -92,42 +97,64 @@ object TopicCommand {
 
   def alterTopic(zkClient: ZkClient, opts: TopicCommandOptions) {
     val topics = getTopics(zkClient, opts)
+    if (topics.length == 0) {
+      println("Topic %s does not exist".format(opts.options.valueOf(opts.topicOpt)))
+    }
     topics.foreach { topic =>
+      val configs = AdminUtils.fetchTopicConfig(zkClient, topic)
       if(opts.options.has(opts.configOpt) || opts.options.has(opts.deleteConfigOpt)) {
         val configsToBeAdded = parseTopicConfigsToBeAdded(opts)
         val configsToBeDeleted = parseTopicConfigsToBeDeleted(opts)
         // compile the final set of configs
-        val configs = AdminUtils.fetchTopicConfig(zkClient, topic)
         configs.putAll(configsToBeAdded)
         configsToBeDeleted.foreach(config => configs.remove(config))
         AdminUtils.changeTopicConfig(zkClient, topic, configs)
         println("Updated config for topic \"%s\".".format(topic))
       }
       if(opts.options.has(opts.partitionsOpt)) {
+        if (topic == OffsetManager.OffsetsTopicName) {
+          throw new IllegalArgumentException("The number of partitions for the offsets topic cannot be changed.")
+        }
         println("WARNING: If partitions are increased for a topic that has a key, the partition " +
           "logic or ordering of the messages will be affected")
         val nPartitions = opts.options.valueOf(opts.partitionsOpt).intValue
         val replicaAssignmentStr = opts.options.valueOf(opts.replicaAssignmentOpt)
-        AdminUtils.addPartitions(zkClient, topic, nPartitions, replicaAssignmentStr)
+        AdminUtils.addPartitions(zkClient, topic, nPartitions, replicaAssignmentStr, config = configs)
         println("Adding partitions succeeded!")
       }
     }
   }
   
-  def deleteTopic(zkClient: ZkClient, opts: TopicCommandOptions) {
+  def listTopics(zkClient: ZkClient, opts: TopicCommandOptions) {
     val topics = getTopics(zkClient, opts)
-    topics.foreach { topic =>
-      AdminUtils.deleteTopic(zkClient, topic)
-      println("Topic \"%s\" queued for deletion.".format(topic))
+    for(topic <- topics) {
+      if (ZkUtils.pathExists(zkClient,ZkUtils.getDeleteTopicPath(topic))) {
+        println("%s - marked for deletion".format(topic))
+      } else {
+        println(topic)
+      }
     }
   }
-  
-  def listTopics(zkClient: ZkClient, opts: TopicCommandOptions) {
+
+  def deleteTopic(zkClient: ZkClient, opts: TopicCommandOptions) {
     val topics = getTopics(zkClient, opts)
-    for(topic <- topics)
-        println(topic)
+    if (topics.length == 0) {
+      println("Topic %s does not exist".format(opts.options.valueOf(opts.topicOpt)))
+    }
+    topics.foreach { topic =>
+      try {
+        ZkUtils.createPersistentPath(zkClient, ZkUtils.getDeleteTopicPath(topic))
+        println("Topic %s is marked for deletion.".format(topic))
+        println("Note: This will have no impact if delete.topic.enable is not set to true.")
+      } catch {
+        case e: ZkNodeExistsException =>
+          println("Topic %s is already marked for deletion.".format(topic))
+        case e2: Throwable =>
+          throw new AdminOperationException("Error while deleting topic %s".format(topic))
+      }    
+    }
   }
-  
+
   def describeTopic(zkClient: ZkClient, opts: TopicCommandOptions) {
     val topics = getTopics(zkClient, opts)
     val reportUnderReplicatedPartitions = if (opts.options.has(opts.reportUnderReplicatedPartitionsOpt)) true else false
@@ -170,7 +197,7 @@ object TopicCommand {
     }
   }
   
-  def formatBroker(broker: Broker) = broker.id + " (" + broker.host + ":" + broker.port + ")"
+  def formatBroker(broker: Broker) = broker.id + " (" + formatAddress(broker.host, broker.port) + ")"
   
   def parseTopicConfigsToBeAdded(opts: TopicCommandOptions): Properties = {
     val configsToBeAdded = opts.options.valuesOf(opts.configOpt).map(_.split("""\s*=\s*"""))
@@ -199,6 +226,9 @@ object TopicCommand {
     val ret = new mutable.HashMap[Int, List[Int]]()
     for (i <- 0 until partitionList.size) {
       val brokerList = partitionList(i).split(":").map(s => s.trim().toInt)
+      val duplicateBrokers = Utils.duplicates(brokerList)
+      if (duplicateBrokers.nonEmpty)
+        throw new AdminCommandFailedException("Partition replica lists may not contain duplicate entries: %s".format(duplicateBrokers.mkString(",")))
       ret.put(i, brokerList.toList)
       if (ret(i).size != ret(0).size)
         throw new AdminOperationException("Partition " + i + " has different replication factor: " + brokerList)
@@ -215,20 +245,23 @@ object TopicCommand {
                            .ofType(classOf[String])
     val listOpt = parser.accepts("list", "List all available topics.")
     val createOpt = parser.accepts("create", "Create a new topic.")
+    val deleteOpt = parser.accepts("delete", "Delete a topic")
     val alterOpt = parser.accepts("alter", "Alter the configuration for the topic.")
-    val deleteOpt = parser.accepts("delete", "Delete the topic.")
     val describeOpt = parser.accepts("describe", "List details for the given topics.")
     val helpOpt = parser.accepts("help", "Print usage information.")
-    val topicOpt = parser.accepts("topic", "The topic to be create, alter, delete, or describe. Can also accept a regular " +
+    val topicOpt = parser.accepts("topic", "The topic to be create, alter or describe. Can also accept a regular " +
                                            "expression except for --create option")
                          .withRequiredArg
                          .describedAs("topic")
                          .ofType(classOf[String])
-    val configOpt = parser.accepts("config", "A topic configuration override for the topic being created or altered.")
+    val nl = System.getProperty("line.separator")
+    val configOpt = parser.accepts("config", "A topic configuration override for the topic being created or altered."  + 
+                                                         "The following is a list of valid configurations: " + nl + LogConfig.configNames.map("\t" + _).mkString(nl) + nl +
+                                                         "See the Kafka documentation for full details on the topic configs.")
                           .withRequiredArg
                           .describedAs("name=value")
                           .ofType(classOf[String])
-    val deleteConfigOpt = parser.accepts("deleteConfig", "A topic configuration override to be removed for an existing topic")
+    val deleteConfigOpt = parser.accepts("delete-config", "A topic configuration override to be removed for an existing topic (see the list of configurations under the --config option).")
                           .withRequiredArg
                           .describedAs("name")
                           .ofType(classOf[String])
@@ -255,7 +288,7 @@ object TopicCommand {
 
     val options = parser.parse(args : _*)
 
-    val allTopicLevelOpts: Set[OptionSpec[_]] = Set(alterOpt, createOpt, deleteOpt, describeOpt, listOpt)
+    val allTopicLevelOpts: Set[OptionSpec[_]] = Set(alterOpt, createOpt, describeOpt, listOpt)
 
     def checkArgs() {
       // check required args
diff --git a/core/src/main/scala/kafka/api/ConsumerMetadataRequest.scala b/core/src/main/scala/kafka/api/ConsumerMetadataRequest.scala
new file mode 100644
index 0000000000000..6d00ed090d76c
--- /dev/null
+++ b/core/src/main/scala/kafka/api/ConsumerMetadataRequest.scala
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import kafka.network.{BoundedByteBufferSend, RequestChannel}
+import kafka.network.RequestChannel.Response
+import kafka.common.ErrorMapping
+
+object ConsumerMetadataRequest {
+  val CurrentVersion = 0.shortValue
+  val DefaultClientId = ""
+
+  def readFrom(buffer: ByteBuffer) = {
+    // envelope
+    val versionId = buffer.getShort
+    val correlationId = buffer.getInt
+    val clientId = ApiUtils.readShortString(buffer)
+
+    // request
+    val group = ApiUtils.readShortString(buffer)
+    ConsumerMetadataRequest(group, versionId, correlationId, clientId)
+  }
+
+}
+
+case class ConsumerMetadataRequest(group: String,
+                                   versionId: Short = ConsumerMetadataRequest.CurrentVersion,
+                                   correlationId: Int = 0,
+                                   clientId: String = ConsumerMetadataRequest.DefaultClientId)
+  extends RequestOrResponse(Some(RequestKeys.ConsumerMetadataKey)) {
+
+  def sizeInBytes =
+    2 + /* versionId */
+    4 + /* correlationId */
+    ApiUtils.shortStringLength(clientId) +
+    ApiUtils.shortStringLength(group)
+
+  def writeTo(buffer: ByteBuffer) {
+    // envelope
+    buffer.putShort(versionId)
+    buffer.putInt(correlationId)
+    ApiUtils.writeShortString(buffer, clientId)
+
+    // consumer metadata request
+    ApiUtils.writeShortString(buffer, group)
+  }
+
+  override def handleError(e: Throwable, requestChannel: RequestChannel, request: RequestChannel.Request): Unit = {
+    // return ConsumerCoordinatorNotAvailable for all uncaught errors
+    val errorResponse = ConsumerMetadataResponse(None, ErrorMapping.ConsumerCoordinatorNotAvailableCode)
+    requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(errorResponse)))
+  }
+
+  def describe(details: Boolean) = {
+    val consumerMetadataRequest = new StringBuilder
+    consumerMetadataRequest.append("Name: " + this.getClass.getSimpleName)
+    consumerMetadataRequest.append("; Version: " + versionId)
+    consumerMetadataRequest.append("; CorrelationId: " + correlationId)
+    consumerMetadataRequest.append("; ClientId: " + clientId)
+    consumerMetadataRequest.append("; Group: " + group)
+    consumerMetadataRequest.toString()
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/api/ConsumerMetadataResponse.scala b/core/src/main/scala/kafka/api/ConsumerMetadataResponse.scala
new file mode 100644
index 0000000000000..84f60178f6eba
--- /dev/null
+++ b/core/src/main/scala/kafka/api/ConsumerMetadataResponse.scala
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import kafka.cluster.Broker
+import kafka.common.ErrorMapping
+
+object ConsumerMetadataResponse {
+  val CurrentVersion = 0
+
+  private val NoBrokerOpt = Some(Broker(id = -1, host = "", port = -1))
+  
+  def readFrom(buffer: ByteBuffer) = {
+    val correlationId = buffer.getInt
+    val errorCode = buffer.getShort
+    val broker = Broker.readFrom(buffer)
+    val coordinatorOpt = if (errorCode == ErrorMapping.NoError)
+      Some(broker)
+    else
+      None
+
+    ConsumerMetadataResponse(coordinatorOpt, errorCode, correlationId)
+  }
+  
+}
+
+case class ConsumerMetadataResponse (coordinatorOpt: Option[Broker], errorCode: Short, correlationId: Int = 0)
+  extends RequestOrResponse() {
+
+  def sizeInBytes =
+    4 + /* correlationId */
+    2 + /* error code */
+    coordinatorOpt.orElse(ConsumerMetadataResponse.NoBrokerOpt).get.sizeInBytes
+
+  def writeTo(buffer: ByteBuffer) {
+    buffer.putInt(correlationId)
+    buffer.putShort(errorCode)
+    coordinatorOpt.orElse(ConsumerMetadataResponse.NoBrokerOpt).foreach(_.writeTo(buffer))
+  }
+
+  def describe(details: Boolean) = toString
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/api/ControlledShutdownRequest.scala b/core/src/main/scala/kafka/api/ControlledShutdownRequest.scala
index 7dacb20237880..5be393ab8272a 100644
--- a/core/src/main/scala/kafka/api/ControlledShutdownRequest.scala
+++ b/core/src/main/scala/kafka/api/ControlledShutdownRequest.scala
@@ -38,9 +38,9 @@ object ControlledShutdownRequest extends Logging {
 }
 
 case class ControlledShutdownRequest(val versionId: Short,
-                                     override val correlationId: Int,
+                                     val correlationId: Int,
                                      val brokerId: Int)
-  extends RequestOrResponse(Some(RequestKeys.ControlledShutdownKey), correlationId){
+  extends RequestOrResponse(Some(RequestKeys.ControlledShutdownKey)){
 
   def this(correlationId: Int, brokerId: Int) =
     this(ControlledShutdownRequest.CurrentVersion, correlationId, brokerId)
diff --git a/core/src/main/scala/kafka/api/ControlledShutdownResponse.scala b/core/src/main/scala/kafka/api/ControlledShutdownResponse.scala
index 46ec3db28f88b..5e0a1cf4f407a 100644
--- a/core/src/main/scala/kafka/api/ControlledShutdownResponse.scala
+++ b/core/src/main/scala/kafka/api/ControlledShutdownResponse.scala
@@ -39,10 +39,10 @@ object ControlledShutdownResponse {
 }
 
 
-case class ControlledShutdownResponse(override val correlationId: Int,
+case class ControlledShutdownResponse(val correlationId: Int,
                                       val errorCode: Short = ErrorMapping.NoError,
                                       val partitionsRemaining: Set[TopicAndPartition])
-  extends RequestOrResponse(correlationId = correlationId) {
+  extends RequestOrResponse() {
   def sizeInBytes(): Int ={
     var size =
       4 /* correlation id */ +
diff --git a/core/src/main/scala/kafka/api/FetchRequest.scala b/core/src/main/scala/kafka/api/FetchRequest.scala
index dea118a2e3792..b038c15186c0c 100644
--- a/core/src/main/scala/kafka/api/FetchRequest.scala
+++ b/core/src/main/scala/kafka/api/FetchRequest.scala
@@ -17,20 +17,19 @@
 
 package kafka.api
 
-import java.nio.ByteBuffer
 import kafka.utils.nonthreadsafe
 import kafka.api.ApiUtils._
-import scala.collection.immutable.Map
 import kafka.common.{ErrorMapping, TopicAndPartition}
 import kafka.consumer.ConsumerConfig
-import java.util.concurrent.atomic.AtomicInteger
 import kafka.network.RequestChannel
 import kafka.message.MessageSet
 
+import java.util.concurrent.atomic.AtomicInteger
+import java.nio.ByteBuffer
+import scala.collection.immutable.Map
 
 case class PartitionFetchInfo(offset: Long, fetchSize: Int)
 
-
 object FetchRequest {
   val CurrentVersion = 0.shortValue
   val DefaultMaxWait = 0
@@ -59,14 +58,14 @@ object FetchRequest {
   }
 }
 
-case class FetchRequest private[kafka] (versionId: Short = FetchRequest.CurrentVersion,
-                                        override val correlationId: Int = FetchRequest.DefaultCorrelationId,
-                                        clientId: String = ConsumerConfig.DefaultClientId,
-                                        replicaId: Int = Request.OrdinaryConsumerId,
-                                        maxWait: Int = FetchRequest.DefaultMaxWait,
-                                        minBytes: Int = FetchRequest.DefaultMinBytes,
-                                        requestInfo: Map[TopicAndPartition, PartitionFetchInfo])
-        extends RequestOrResponse(Some(RequestKeys.FetchKey), correlationId) {
+case class FetchRequest(versionId: Short = FetchRequest.CurrentVersion,
+                        correlationId: Int = FetchRequest.DefaultCorrelationId,
+                        clientId: String = ConsumerConfig.DefaultClientId,
+                        replicaId: Int = Request.OrdinaryConsumerId,
+                        maxWait: Int = FetchRequest.DefaultMaxWait,
+                        minBytes: Int = FetchRequest.DefaultMinBytes,
+                        requestInfo: Map[TopicAndPartition, PartitionFetchInfo])
+        extends RequestOrResponse(Some(RequestKeys.FetchKey)) {
 
   /**
    * Partitions the request info into a map of maps (one for each topic).
@@ -132,7 +131,7 @@ case class FetchRequest private[kafka] (versionId: Short = FetchRequest.CurrentV
     })
   }
 
-  def isFromFollower = Request.isReplicaIdFromFollower(replicaId)
+  def isFromFollower = Request.isValidBrokerId(replicaId)
 
   def isFromOrdinaryConsumer = replicaId == Request.OrdinaryConsumerId
 
diff --git a/core/src/main/scala/kafka/api/FetchResponse.scala b/core/src/main/scala/kafka/api/FetchResponse.scala
index d117f10f724b0..75aaf57fb76ec 100644
--- a/core/src/main/scala/kafka/api/FetchResponse.scala
+++ b/core/src/main/scala/kafka/api/FetchResponse.scala
@@ -19,11 +19,14 @@ package kafka.api
 
 import java.nio.ByteBuffer
 import java.nio.channels.GatheringByteChannel
+
 import kafka.common.{TopicAndPartition, ErrorMapping}
 import kafka.message.{MessageSet, ByteBufferMessageSet}
 import kafka.network.{MultiSend, Send}
 import kafka.api.ApiUtils._
 
+import scala.collection._
+
 object FetchResponsePartitionData {
   def readFrom(buffer: ByteBuffer): FetchResponsePartitionData = {
     val error = buffer.getShort
@@ -149,9 +152,8 @@ object FetchResponse {
   }
 }
 
-
-case class FetchResponse(correlationId: Int,
-                         data: Map[TopicAndPartition, FetchResponsePartitionData])  {
+case class FetchResponse(correlationId: Int, data: Map[TopicAndPartition, FetchResponsePartitionData])
+  extends RequestOrResponse() {
 
   /**
    * Partitions the data into a map of maps (one for each topic).
@@ -167,6 +169,16 @@ case class FetchResponse(correlationId: Int,
       folded + topicData.sizeInBytes
     })
 
+  /*
+   * FetchResponse uses [sendfile](http://man7.org/linux/man-pages/man2/sendfile.2.html)
+   * api for data transfer through the FetchResponseSend, so `writeTo` aren't actually being used.
+   * It is implemented as an empty function to conform to `RequestOrResponse.writeTo`
+   * abstract method signature.
+   */
+  def writeTo(buffer: ByteBuffer): Unit = throw new UnsupportedOperationException
+
+  override def describe(details: Boolean): String = toString
+
   private def partitionDataFor(topic: String, partition: Int): FetchResponsePartitionData = {
     val topicAndPartition = TopicAndPartition(topic, partition)
     data.get(topicAndPartition) match {
diff --git a/core/src/main/scala/kafka/api/GenericRequestAndHeader.scala b/core/src/main/scala/kafka/api/GenericRequestAndHeader.scala
new file mode 100644
index 0000000000000..f40e19f4b2ad7
--- /dev/null
+++ b/core/src/main/scala/kafka/api/GenericRequestAndHeader.scala
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import org.apache.kafka.common.requests.AbstractRequestResponse
+import kafka.api.ApiUtils._
+
+private[kafka] abstract class GenericRequestAndHeader(val versionId: Short,
+                                                      val correlationId: Int,
+                                                      val clientId: String,
+                                                      val body: AbstractRequestResponse,
+                                                      val name: String,
+                                                      override val requestId: Option[Short] = None)
+  extends RequestOrResponse(requestId) {
+
+  def writeTo(buffer: ByteBuffer) {
+    buffer.putShort(versionId)
+    buffer.putInt(correlationId)
+    writeShortString(buffer, clientId)
+    body.writeTo(buffer)
+  }
+
+  def sizeInBytes(): Int = {
+    2 /* version id */ +
+    4 /* correlation id */ +
+    (2 + clientId.length) /* client id */ +
+    body.sizeOf();
+  }
+
+  override def toString(): String = {
+    describe(true)
+  }
+
+  override def describe(details: Boolean): String = {
+    val strBuffer = new StringBuilder
+    strBuffer.append("Name: " + name)
+    strBuffer.append("; Version: " + versionId)
+    strBuffer.append("; CorrelationId: " + correlationId)
+    strBuffer.append("; ClientId: " + clientId)
+    strBuffer.append("; Body: " + body.toString)
+    strBuffer.toString()
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/api/GenericResponseAndHeader.scala b/core/src/main/scala/kafka/api/GenericResponseAndHeader.scala
new file mode 100644
index 0000000000000..a4879e26b5362
--- /dev/null
+++ b/core/src/main/scala/kafka/api/GenericResponseAndHeader.scala
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import org.apache.kafka.common.requests.AbstractRequestResponse
+
+private[kafka] abstract class GenericResponseAndHeader(val correlationId: Int,
+                                                       val body: AbstractRequestResponse,
+                                                       val name: String,
+                                                       override val requestId: Option[Short] = None)
+  extends RequestOrResponse(requestId) {
+
+  def writeTo(buffer: ByteBuffer) {
+    buffer.putInt(correlationId)
+    body.writeTo(buffer)
+  }
+
+  def sizeInBytes(): Int = {
+    4 /* correlation id */ +
+    body.sizeOf();
+  }
+
+  override def toString(): String = {
+    describe(true)
+  }
+
+  override def describe(details: Boolean): String = {
+    val strBuffer = new StringBuilder
+    strBuffer.append("Name: " + name)
+    strBuffer.append("; CorrelationId: " + correlationId)
+    strBuffer.append("; Body: " + body.toString)
+    strBuffer.toString()
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/api/HeartbeatRequestAndHeader.scala b/core/src/main/scala/kafka/api/HeartbeatRequestAndHeader.scala
new file mode 100644
index 0000000000000..f168d9fc99ce5
--- /dev/null
+++ b/core/src/main/scala/kafka/api/HeartbeatRequestAndHeader.scala
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import kafka.network.{BoundedByteBufferSend, RequestChannel}
+import kafka.common.ErrorMapping
+import org.apache.kafka.common.requests.{HeartbeatResponse, HeartbeatRequest}
+import kafka.api.ApiUtils._
+import kafka.network.RequestChannel.Response
+import scala.Some
+
+object HeartbeatRequestAndHeader {
+  def readFrom(buffer: ByteBuffer): HeartbeatRequestAndHeader = {
+    val versionId = buffer.getShort
+    val correlationId = buffer.getInt
+    val clientId = readShortString(buffer)
+    val body = HeartbeatRequest.parse(buffer)
+    new HeartbeatRequestAndHeader(versionId, correlationId, clientId, body)
+  }
+}
+
+case class HeartbeatRequestAndHeader(override val versionId: Short,
+                                     override val correlationId: Int,
+                                     override val clientId: String,
+                                     override val body: HeartbeatRequest)
+  extends GenericRequestAndHeader(versionId, correlationId, clientId, body, RequestKeys.nameForKey(RequestKeys.HeartbeatKey), Some(RequestKeys.HeartbeatKey)) {
+
+  override def handleError(e: Throwable, requestChannel: RequestChannel, request: RequestChannel.Request): Unit = {
+    val errorResponseBody = new HeartbeatResponse(ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
+    val errorHeartBeatResponseAndHeader = new HeartbeatResponseAndHeader(correlationId, errorResponseBody)
+    requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(errorHeartBeatResponseAndHeader)))
+  }
+}
diff --git a/core/src/main/scala/kafka/api/HeartbeatResponseAndHeader.scala b/core/src/main/scala/kafka/api/HeartbeatResponseAndHeader.scala
new file mode 100644
index 0000000000000..9a71faae3138a
--- /dev/null
+++ b/core/src/main/scala/kafka/api/HeartbeatResponseAndHeader.scala
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package kafka.api
+
+import org.apache.kafka.common.requests.HeartbeatResponse
+import java.nio.ByteBuffer
+
+object HeartbeatResponseAndHeader {
+  def readFrom(buffer: ByteBuffer): HeartbeatResponseAndHeader = {
+    val correlationId = buffer.getInt
+    val body = HeartbeatResponse.parse(buffer)
+    new HeartbeatResponseAndHeader(correlationId, body)
+  }
+}
+
+case class HeartbeatResponseAndHeader(override val correlationId: Int, override val body: HeartbeatResponse)
+  extends GenericResponseAndHeader(correlationId, body, RequestKeys.nameForKey(RequestKeys.HeartbeatKey), None) {
+}
diff --git a/core/src/main/scala/kafka/api/JoinGroupRequestAndHeader.scala b/core/src/main/scala/kafka/api/JoinGroupRequestAndHeader.scala
new file mode 100644
index 0000000000000..3651e8603dd0e
--- /dev/null
+++ b/core/src/main/scala/kafka/api/JoinGroupRequestAndHeader.scala
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+package kafka.api
+
+import java.nio.ByteBuffer
+import kafka.network.{BoundedByteBufferSend, RequestChannel}
+import kafka.common.ErrorMapping
+import org.apache.kafka.common.requests._
+import kafka.api.ApiUtils._
+import kafka.network.RequestChannel.Response
+import scala.Some
+
+object JoinGroupRequestAndHeader {
+  def readFrom(buffer: ByteBuffer): JoinGroupRequestAndHeader = {
+    val versionId = buffer.getShort
+    val correlationId = buffer.getInt
+    val clientId = readShortString(buffer)
+    val body = JoinGroupRequest.parse(buffer)
+    new JoinGroupRequestAndHeader(versionId, correlationId, clientId, body)
+  }
+}
+
+case class JoinGroupRequestAndHeader(override val versionId: Short,
+                                     override val correlationId: Int,
+                                     override val clientId: String,
+                                     override val body: JoinGroupRequest)
+  extends GenericRequestAndHeader(versionId, correlationId, clientId, body, RequestKeys.nameForKey(RequestKeys.JoinGroupKey), Some(RequestKeys.JoinGroupKey)) {
+
+  override def handleError(e: Throwable, requestChannel: RequestChannel, request: RequestChannel.Request): Unit = {
+    val errorResponseBody = new JoinGroupResponse(ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
+    val errorHeartBeatResponseAndHeader = new JoinGroupResponseAndHeader(correlationId, errorResponseBody)
+    requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(errorHeartBeatResponseAndHeader)))
+  }
+}
diff --git a/core/src/main/scala/kafka/api/JoinGroupResponseAndHeader.scala b/core/src/main/scala/kafka/api/JoinGroupResponseAndHeader.scala
new file mode 100644
index 0000000000000..d0f07e0cbbdac
--- /dev/null
+++ b/core/src/main/scala/kafka/api/JoinGroupResponseAndHeader.scala
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+ * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package kafka.api
+
+import org.apache.kafka.common.requests.JoinGroupResponse
+import java.nio.ByteBuffer
+
+object JoinGroupResponseAndHeader {
+  def readFrom(buffer: ByteBuffer): JoinGroupResponseAndHeader = {
+    val correlationId = buffer.getInt
+    val body = JoinGroupResponse.parse(buffer)
+    new JoinGroupResponseAndHeader(correlationId, body)
+  }
+}
+
+case class JoinGroupResponseAndHeader(override val correlationId: Int, override val body: JoinGroupResponse)
+  extends GenericResponseAndHeader(correlationId, body, RequestKeys.nameForKey(RequestKeys.JoinGroupKey), None) {
+}
diff --git a/core/src/main/scala/kafka/api/LeaderAndIsrRequest.scala b/core/src/main/scala/kafka/api/LeaderAndIsrRequest.scala
index 03117377e1cb2..4ff7e8f8cc695 100644
--- a/core/src/main/scala/kafka/api/LeaderAndIsrRequest.scala
+++ b/core/src/main/scala/kafka/api/LeaderAndIsrRequest.scala
@@ -32,6 +32,8 @@ import collection.Set
 object LeaderAndIsr {
   val initialLeaderEpoch: Int = 0
   val initialZKVersion: Int = 0
+  val NoLeader = -1
+  val LeaderDuringDelete = -2
 }
 
 case class LeaderAndIsr(var leader: Int, var leaderEpoch: Int, var isr: List[Int], var zkVersion: Int) {
@@ -127,13 +129,13 @@ object LeaderAndIsrRequest {
 }
 
 case class LeaderAndIsrRequest (versionId: Short,
-                                override val correlationId: Int,
+                                correlationId: Int,
                                 clientId: String,
                                 controllerId: Int,
                                 controllerEpoch: Int,
                                 partitionStateInfos: Map[(String, Int), PartitionStateInfo],
                                 leaders: Set[Broker])
-    extends RequestOrResponse(Some(RequestKeys.LeaderAndIsrKey), correlationId) {
+    extends RequestOrResponse(Some(RequestKeys.LeaderAndIsrKey)) {
 
   def this(partitionStateInfos: Map[(String, Int), PartitionStateInfo], leaders: Set[Broker], controllerId: Int,
            controllerEpoch: Int, correlationId: Int, clientId: String) = {
diff --git a/core/src/main/scala/kafka/api/LeaderAndIsrResponse.scala b/core/src/main/scala/kafka/api/LeaderAndIsrResponse.scala
index f63644448bb5a..22ce48a0cc0ab 100644
--- a/core/src/main/scala/kafka/api/LeaderAndIsrResponse.scala
+++ b/core/src/main/scala/kafka/api/LeaderAndIsrResponse.scala
@@ -41,10 +41,10 @@ object LeaderAndIsrResponse {
 }
 
 
-case class LeaderAndIsrResponse(override val correlationId: Int,
+case class LeaderAndIsrResponse(correlationId: Int,
                                 responseMap: Map[(String, Int), Short],
                                 errorCode: Short = ErrorMapping.NoError)
-    extends RequestOrResponse(correlationId = correlationId) {
+    extends RequestOrResponse() {
   def sizeInBytes(): Int ={
     var size =
       4 /* correlation id */ + 
diff --git a/core/src/main/scala/kafka/api/OffsetCommitRequest.scala b/core/src/main/scala/kafka/api/OffsetCommitRequest.scala
index 4d1fa5cbfde92..050615c72efe7 100644
--- a/core/src/main/scala/kafka/api/OffsetCommitRequest.scala
+++ b/core/src/main/scala/kafka/api/OffsetCommitRequest.scala
@@ -18,24 +18,39 @@
 package kafka.api
 
 import java.nio.ByteBuffer
-
 import kafka.api.ApiUtils._
-import kafka.utils.Logging
+import kafka.utils.{SystemTime, Logging}
 import kafka.network.{RequestChannel, BoundedByteBufferSend}
-import kafka.common.{ErrorMapping, TopicAndPartition, OffsetMetadataAndError}
+import kafka.common.{OffsetAndMetadata, ErrorMapping, TopicAndPartition}
 import kafka.network.RequestChannel.Response
+import scala.collection._
+
 object OffsetCommitRequest extends Logging {
-  val CurrentVersion: Short = 0
+  val CurrentVersion: Short = 1
   val DefaultClientId = ""
 
   def readFrom(buffer: ByteBuffer): OffsetCommitRequest = {
+    val now = SystemTime.milliseconds
+
     // Read values from the envelope
     val versionId = buffer.getShort
+    assert(versionId == 0 || versionId == 1,
+           "Version " + versionId + " is invalid for OffsetCommitRequest. Valid versions are 0 or 1.")
+
     val correlationId = buffer.getInt
     val clientId = readShortString(buffer)
 
     // Read the OffsetRequest 
     val consumerGroupId = readShortString(buffer)
+
+    // version 1 specific fields
+    var groupGenerationId: Int = org.apache.kafka.common.requests.OffsetCommitRequest.DEFAULT_GENERATION_ID
+    var consumerId: String = org.apache.kafka.common.requests.OffsetCommitRequest.DEFAULT_CONSUMER_ID
+    if (versionId == 1) {
+      groupGenerationId = buffer.getInt
+      consumerId = readShortString(buffer)
+    }
+
     val topicCount = buffer.getInt
     val pairs = (1 to topicCount).flatMap(_ => {
       val topic = readShortString(buffer)
@@ -43,23 +58,32 @@ object OffsetCommitRequest extends Logging {
       (1 to partitionCount).map(_ => {
         val partitionId = buffer.getInt
         val offset = buffer.getLong
+        val timestamp = {
+          val given = buffer.getLong
+          if (given == -1L) now else given
+        }
         val metadata = readShortString(buffer)
-        (TopicAndPartition(topic, partitionId), OffsetMetadataAndError(offset, metadata))
+        (TopicAndPartition(topic, partitionId), OffsetAndMetadata(offset, metadata, timestamp))
       })
     })
-    OffsetCommitRequest(consumerGroupId, Map(pairs:_*), versionId, correlationId, clientId)
+    OffsetCommitRequest(consumerGroupId, immutable.Map(pairs:_*), versionId, correlationId, clientId, groupGenerationId, consumerId)
   }
 }
 
 case class OffsetCommitRequest(groupId: String,
-                               requestInfo: Map[TopicAndPartition, OffsetMetadataAndError],
+                               requestInfo: immutable.Map[TopicAndPartition, OffsetAndMetadata],
                                versionId: Short = OffsetCommitRequest.CurrentVersion,
-                               override val correlationId: Int = 0,
-                               clientId: String = OffsetCommitRequest.DefaultClientId)
-    extends RequestOrResponse(Some(RequestKeys.OffsetCommitKey), correlationId) {
+                               correlationId: Int = 0,
+                               clientId: String = OffsetCommitRequest.DefaultClientId,
+                               groupGenerationId: Int = org.apache.kafka.common.requests.OffsetCommitRequest.DEFAULT_GENERATION_ID,
+                               consumerId: String =  org.apache.kafka.common.requests.OffsetCommitRequest.DEFAULT_CONSUMER_ID)
+    extends RequestOrResponse(Some(RequestKeys.OffsetCommitKey)) {
+
+  assert(versionId == 0 || versionId == 1,
+         "Version " + versionId + " is invalid for OffsetCommitRequest. Valid versions are 0 or 1.")
 
   lazy val requestInfoGroupedByTopic = requestInfo.groupBy(_._1.topic)
-  
+
   def writeTo(buffer: ByteBuffer) {
     // Write envelope
     buffer.putShort(versionId)
@@ -68,14 +92,21 @@ case class OffsetCommitRequest(groupId: String,
 
     // Write OffsetCommitRequest
     writeShortString(buffer, groupId)             // consumer group
+
+    // version 1 specific data
+    if (versionId == 1) {
+      buffer.putInt(groupGenerationId)
+      writeShortString(buffer, consumerId)
+    }
     buffer.putInt(requestInfoGroupedByTopic.size) // number of topics
     requestInfoGroupedByTopic.foreach( t1 => { // topic -> Map[TopicAndPartition, OffsetMetadataAndError]
       writeShortString(buffer, t1._1) // topic
       buffer.putInt(t1._2.size)       // number of partitions for this topic
       t1._2.foreach( t2 => {
-        buffer.putInt(t2._1.partition)  // partition
-        buffer.putLong(t2._2.offset)    // offset
-        writeShortString(buffer, t2._2.metadata) // metadata
+        buffer.putInt(t2._1.partition)
+        buffer.putLong(t2._2.offset)
+        buffer.putLong(t2._2.timestamp)
+        writeShortString(buffer, t2._2.metadata)
       })
     })
   }
@@ -84,7 +115,8 @@ case class OffsetCommitRequest(groupId: String,
     2 + /* versionId */
     4 + /* correlationId */
     shortStringLength(clientId) +
-    shortStringLength(groupId) + 
+    shortStringLength(groupId) +
+    (if (versionId == 1) 4 /* group generation id */ + shortStringLength(consumerId) else 0) +
     4 + /* topic count */
     requestInfoGroupedByTopic.foldLeft(0)((count, topicAndOffsets) => {
       val (topic, offsets) = topicAndOffsets
@@ -95,16 +127,17 @@ case class OffsetCommitRequest(groupId: String,
         innerCount +
         4 /* partition */ +
         8 /* offset */ +
+        8 /* timestamp */ +
         shortStringLength(offsetAndMetadata._2.metadata)
       })
     })
 
   override  def handleError(e: Throwable, requestChannel: RequestChannel, request: RequestChannel.Request): Unit = {
-    val responseMap = requestInfo.map {
-      case (topicAndPartition, offset) => (topicAndPartition, ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
-    }.toMap
-    val errorResponse = OffsetCommitResponse(requestInfo=responseMap, correlationId=correlationId)
-    requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(errorResponse)))
+    val errorCode = ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])
+    val commitStatus = requestInfo.mapValues(_ => errorCode)
+    val commitResponse = OffsetCommitResponse(commitStatus, correlationId)
+
+    requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(commitResponse)))
   }
 
   override def describe(details: Boolean): String = {
@@ -114,12 +147,14 @@ case class OffsetCommitRequest(groupId: String,
     offsetCommitRequest.append("; CorrelationId: " + correlationId)
     offsetCommitRequest.append("; ClientId: " + clientId)
     offsetCommitRequest.append("; GroupId: " + groupId)
+    offsetCommitRequest.append("; GroupGenerationId: " + groupGenerationId)
+    offsetCommitRequest.append("; ConsumerId: " + consumerId)
     if(details)
       offsetCommitRequest.append("; RequestInfo: " + requestInfo.mkString(","))
     offsetCommitRequest.toString()
   }
 
-  override def toString(): String = {
-    describe(true)
+  override def toString = {
+    describe(details = true)
   }
 }
diff --git a/core/src/main/scala/kafka/api/OffsetCommitResponse.scala b/core/src/main/scala/kafka/api/OffsetCommitResponse.scala
index 9e1795f9db15e..624a1c1cc5406 100644
--- a/core/src/main/scala/kafka/api/OffsetCommitResponse.scala
+++ b/core/src/main/scala/kafka/api/OffsetCommitResponse.scala
@@ -19,9 +19,8 @@ package kafka.api
 
 import java.nio.ByteBuffer
 
-import kafka.api.ApiUtils._
-import kafka.common.TopicAndPartition
 import kafka.utils.Logging
+import kafka.common.TopicAndPartition
 
 object OffsetCommitResponse extends Logging {
   val CurrentVersion: Short = 0
@@ -30,7 +29,7 @@ object OffsetCommitResponse extends Logging {
     val correlationId = buffer.getInt
     val topicCount = buffer.getInt
     val pairs = (1 to topicCount).flatMap(_ => {
-      val topic = readShortString(buffer)
+      val topic = ApiUtils.readShortString(buffer)
       val partitionCount = buffer.getInt
       (1 to partitionCount).map(_ => {
         val partitionId = buffer.getInt
@@ -42,37 +41,34 @@ object OffsetCommitResponse extends Logging {
   }
 }
 
-case class OffsetCommitResponse(requestInfo: Map[TopicAndPartition, Short],
-                               override val correlationId: Int = 0)
-    extends RequestOrResponse(correlationId=correlationId) {
+case class OffsetCommitResponse(commitStatus: Map[TopicAndPartition, Short],
+                                correlationId: Int = 0)
+    extends RequestOrResponse() {
 
-  lazy val requestInfoGroupedByTopic = requestInfo.groupBy(_._1.topic)
+  lazy val commitStatusGroupedByTopic = commitStatus.groupBy(_._1.topic)
 
   def writeTo(buffer: ByteBuffer) {
     buffer.putInt(correlationId)
-    buffer.putInt(requestInfoGroupedByTopic.size) // number of topics
-    requestInfoGroupedByTopic.foreach( t1 => { // topic -> Map[TopicAndPartition, Short]
-      writeShortString(buffer, t1._1) // topic
-      buffer.putInt(t1._2.size)       // number of partitions for this topic
-      t1._2.foreach( t2 => {  // TopicAndPartition -> Short
-        buffer.putInt(t2._1.partition)
-        buffer.putShort(t2._2)  //error
-      })
-    })
+    buffer.putInt(commitStatusGroupedByTopic.size)
+    commitStatusGroupedByTopic.foreach { case(topic, statusMap) =>
+      ApiUtils.writeShortString(buffer, topic)
+      buffer.putInt(statusMap.size) // partition count
+      statusMap.foreach { case(topicAndPartition, errorCode) =>
+        buffer.putInt(topicAndPartition.partition)
+        buffer.putShort(errorCode)
+      }
+    }
   }
 
   override def sizeInBytes = 
     4 + /* correlationId */
     4 + /* topic count */
-    requestInfoGroupedByTopic.foldLeft(0)((count, topicAndOffsets) => {
-      val (topic, offsets) = topicAndOffsets
+    commitStatusGroupedByTopic.foldLeft(0)((count, partitionStatusMap) => {
+      val (topic, partitionStatus) = partitionStatusMap
       count +
-      shortStringLength(topic) + /* topic */
-      4 + /* number of partitions */
-      offsets.size * (
-        4 + /* partition */
-        2 /* error */
-      )
+      ApiUtils.shortStringLength(topic) +
+      4 + /* partition count */
+      partitionStatus.size * ( 4 /* partition */  + 2 /* error code */)
     })
 
   override def describe(details: Boolean):String = { toString }
diff --git a/core/src/main/scala/kafka/api/OffsetFetchRequest.scala b/core/src/main/scala/kafka/api/OffsetFetchRequest.scala
index 7036532db1405..c7604b9cdeb8f 100644
--- a/core/src/main/scala/kafka/api/OffsetFetchRequest.scala
+++ b/core/src/main/scala/kafka/api/OffsetFetchRequest.scala
@@ -23,7 +23,7 @@ import kafka.api.ApiUtils._
 import kafka.utils.Logging
 import kafka.network.{BoundedByteBufferSend, RequestChannel}
 import kafka.network.RequestChannel.Response
-import kafka.common.{ErrorMapping, OffsetMetadataAndError, TopicAndPartition}
+import kafka.common.{OffsetAndMetadata, ErrorMapping, OffsetMetadataAndError, TopicAndPartition}
 object OffsetFetchRequest extends Logging {
   val CurrentVersion: Short = 0
   val DefaultClientId = ""
@@ -50,11 +50,11 @@ object OffsetFetchRequest extends Logging {
 }
 
 case class OffsetFetchRequest(groupId: String,
-                               requestInfo: Seq[TopicAndPartition],
-                               versionId: Short = OffsetFetchRequest.CurrentVersion,
-                               override val correlationId: Int = 0,
-                               clientId: String = OffsetFetchRequest.DefaultClientId)
-    extends RequestOrResponse(Some(RequestKeys.OffsetFetchKey), correlationId) {
+                              requestInfo: Seq[TopicAndPartition],
+                              versionId: Short = OffsetFetchRequest.CurrentVersion,
+                              correlationId: Int = 0,
+                              clientId: String = OffsetFetchRequest.DefaultClientId)
+    extends RequestOrResponse(Some(RequestKeys.OffsetFetchKey)) {
 
   lazy val requestInfoGroupedByTopic = requestInfo.groupBy(_.topic)
   
@@ -91,8 +91,8 @@ case class OffsetFetchRequest(groupId: String,
   override  def handleError(e: Throwable, requestChannel: RequestChannel, request: RequestChannel.Request): Unit = {
     val responseMap = requestInfo.map {
       case (topicAndPartition) => (topicAndPartition, OffsetMetadataAndError(
-        offset=OffsetMetadataAndError.InvalidOffset,
-        error=ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])
+        offset = OffsetAndMetadata.InvalidOffset,
+        error = ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])
       ))
     }.toMap
     val errorResponse = OffsetFetchResponse(requestInfo=responseMap, correlationId=correlationId)
@@ -111,7 +111,7 @@ case class OffsetFetchRequest(groupId: String,
     offsetFetchRequest.toString()
   }
 
-  override def toString(): String = {
-    describe(true)
+  override def toString: String = {
+    describe(details = true)
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/kafka/api/OffsetFetchResponse.scala b/core/src/main/scala/kafka/api/OffsetFetchResponse.scala
index c1222f422ddb6..e3523f8dcc028 100644
--- a/core/src/main/scala/kafka/api/OffsetFetchResponse.scala
+++ b/core/src/main/scala/kafka/api/OffsetFetchResponse.scala
@@ -45,8 +45,8 @@ object OffsetFetchResponse extends Logging {
 }
 
 case class OffsetFetchResponse(requestInfo: Map[TopicAndPartition, OffsetMetadataAndError],
-                               override val correlationId: Int = 0)
-    extends RequestOrResponse(correlationId = correlationId) {
+                               correlationId: Int = 0)
+    extends RequestOrResponse() {
 
   lazy val requestInfoGroupedByTopic = requestInfo.groupBy(_._1.topic)
 
diff --git a/core/src/main/scala/kafka/api/OffsetRequest.scala b/core/src/main/scala/kafka/api/OffsetRequest.scala
index 7cbc26c6e3842..3d483bc7518ad 100644
--- a/core/src/main/scala/kafka/api/OffsetRequest.scala
+++ b/core/src/main/scala/kafka/api/OffsetRequest.scala
@@ -57,10 +57,10 @@ case class PartitionOffsetRequestInfo(time: Long, maxNumOffsets: Int)
 
 case class OffsetRequest(requestInfo: Map[TopicAndPartition, PartitionOffsetRequestInfo],
                          versionId: Short = OffsetRequest.CurrentVersion,
-                         override val correlationId: Int = 0,
+                         correlationId: Int = 0,
                          clientId: String = OffsetRequest.DefaultClientId,
                          replicaId: Int = Request.OrdinaryConsumerId)
-    extends RequestOrResponse(Some(RequestKeys.OffsetsKey), correlationId) {
+    extends RequestOrResponse(Some(RequestKeys.OffsetsKey)) {
 
   def this(requestInfo: Map[TopicAndPartition, PartitionOffsetRequestInfo], correlationId: Int, replicaId: Int) = this(requestInfo, OffsetRequest.CurrentVersion, correlationId, OffsetRequest.DefaultClientId, replicaId)
 
diff --git a/core/src/main/scala/kafka/api/OffsetResponse.scala b/core/src/main/scala/kafka/api/OffsetResponse.scala
index 0e1d6e362a1ce..63c0899ec46c4 100644
--- a/core/src/main/scala/kafka/api/OffsetResponse.scala
+++ b/core/src/main/scala/kafka/api/OffsetResponse.scala
@@ -51,9 +51,9 @@ case class PartitionOffsetsResponse(error: Short, offsets: Seq[Long]) {
 }
 
 
-case class OffsetResponse(override val correlationId: Int,
+case class OffsetResponse(correlationId: Int,
                           partitionErrorAndOffsets: Map[TopicAndPartition, PartitionOffsetsResponse])
-    extends RequestOrResponse(correlationId = correlationId) {
+    extends RequestOrResponse() {
 
   lazy val offsetsGroupedByTopic = partitionErrorAndOffsets.groupBy(_._1.topic)
 
diff --git a/core/src/main/scala/kafka/api/ProducerRequest.scala b/core/src/main/scala/kafka/api/ProducerRequest.scala
index 0c295a2fe6712..570b2da1d8650 100644
--- a/core/src/main/scala/kafka/api/ProducerRequest.scala
+++ b/core/src/main/scala/kafka/api/ProducerRequest.scala
@@ -53,12 +53,12 @@ object ProducerRequest {
 }
 
 case class ProducerRequest(versionId: Short = ProducerRequest.CurrentVersion,
-                           override val correlationId: Int,
+                           correlationId: Int,
                            clientId: String,
                            requiredAcks: Short,
                            ackTimeoutMs: Int,
                            data: collection.mutable.Map[TopicAndPartition, ByteBufferMessageSet])
-    extends RequestOrResponse(Some(RequestKeys.ProduceKey), correlationId) {
+    extends RequestOrResponse(Some(RequestKeys.ProduceKey)) {
 
   /**
    * Partitions the data into a map of maps (one for each topic).
@@ -153,7 +153,6 @@ case class ProducerRequest(versionId: Short = ProducerRequest.CurrentVersion,
     producerRequest.toString()
   }
 
-
   def emptyData(){
     data.clear()
   }
diff --git a/core/src/main/scala/kafka/api/ProducerResponse.scala b/core/src/main/scala/kafka/api/ProducerResponse.scala
index 06261b9136399..5d1fac4cb8943 100644
--- a/core/src/main/scala/kafka/api/ProducerResponse.scala
+++ b/core/src/main/scala/kafka/api/ProducerResponse.scala
@@ -41,11 +41,10 @@ object ProducerResponse {
   }
 }
 
-case class ProducerResponseStatus(error: Short, offset: Long)
+case class ProducerResponseStatus(var error: Short, offset: Long)
 
-case class ProducerResponse(override val correlationId: Int,
-                            status: Map[TopicAndPartition, ProducerResponseStatus])
-    extends RequestOrResponse(correlationId = correlationId) {
+case class ProducerResponse(correlationId: Int, status: Map[TopicAndPartition, ProducerResponseStatus])
+    extends RequestOrResponse() {
 
   /**
    * Partitions the status map into a map of maps (one for each topic).
diff --git a/core/src/main/scala/kafka/api/RequestKeys.scala b/core/src/main/scala/kafka/api/RequestKeys.scala
index c81214fec0bf7..c24c0345feedc 100644
--- a/core/src/main/scala/kafka/api/RequestKeys.scala
+++ b/core/src/main/scala/kafka/api/RequestKeys.scala
@@ -31,6 +31,9 @@ object RequestKeys {
   val ControlledShutdownKey: Short = 7
   val OffsetCommitKey: Short = 8
   val OffsetFetchKey: Short = 9
+  val ConsumerMetadataKey: Short = 10
+  val JoinGroupKey: Short = 11
+  val HeartbeatKey: Short = 12
 
   val keyToNameAndDeserializerMap: Map[Short, (String, (ByteBuffer) => RequestOrResponse)]=
     Map(ProduceKey -> ("Produce", ProducerRequest.readFrom),
@@ -42,7 +45,11 @@ object RequestKeys {
         UpdateMetadataKey -> ("UpdateMetadata", UpdateMetadataRequest.readFrom),
         ControlledShutdownKey -> ("ControlledShutdown", ControlledShutdownRequest.readFrom),
         OffsetCommitKey -> ("OffsetCommit", OffsetCommitRequest.readFrom),
-        OffsetFetchKey -> ("OffsetFetch", OffsetFetchRequest.readFrom))
+        OffsetFetchKey -> ("OffsetFetch", OffsetFetchRequest.readFrom),
+        ConsumerMetadataKey -> ("ConsumerMetadata", ConsumerMetadataRequest.readFrom),
+        JoinGroupKey -> ("JoinGroup", JoinGroupRequestAndHeader.readFrom),
+        HeartbeatKey -> ("Heartbeat", HeartbeatRequestAndHeader.readFrom)
+    )
 
   def nameForKey(key: Short): String = {
     keyToNameAndDeserializerMap.get(key) match {
diff --git a/core/src/main/scala/kafka/api/RequestOrResponse.scala b/core/src/main/scala/kafka/api/RequestOrResponse.scala
index 708e547a358c9..73ec1d9fb811c 100644
--- a/core/src/main/scala/kafka/api/RequestOrResponse.scala
+++ b/core/src/main/scala/kafka/api/RequestOrResponse.scala
@@ -25,12 +25,12 @@ object Request {
   val OrdinaryConsumerId: Int = -1
   val DebuggingConsumerId: Int = -2
 
-  // Followers use broker id as the replica id, which are non-negative int.
-  def isReplicaIdFromFollower(replicaId: Int): Boolean = (replicaId >= 0)
+  // Broker ids are non-negative int.
+  def isValidBrokerId(brokerId: Int): Boolean = (brokerId >= 0)
 }
 
 
-private[kafka] abstract class RequestOrResponse(val requestId: Option[Short] = None, val correlationId: Int) extends Logging {
+abstract class RequestOrResponse(val requestId: Option[Short] = None) extends Logging {
 
   def sizeInBytes: Int
   
diff --git a/core/src/main/scala/kafka/api/StopReplicaRequest.scala b/core/src/main/scala/kafka/api/StopReplicaRequest.scala
index 68fc1389ee711..5e14987c990fe 100644
--- a/core/src/main/scala/kafka/api/StopReplicaRequest.scala
+++ b/core/src/main/scala/kafka/api/StopReplicaRequest.scala
@@ -54,13 +54,13 @@ object StopReplicaRequest extends Logging {
 }
 
 case class StopReplicaRequest(versionId: Short,
-                              override val correlationId: Int,
+                              correlationId: Int,
                               clientId: String,
                               controllerId: Int,
                               controllerEpoch: Int,
                               deletePartitions: Boolean,
                               partitions: Set[TopicAndPartition])
-        extends RequestOrResponse(Some(RequestKeys.StopReplicaKey), correlationId) {
+        extends RequestOrResponse(Some(RequestKeys.StopReplicaKey)) {
 
   def this(deletePartitions: Boolean, partitions: Set[TopicAndPartition], controllerId: Int, controllerEpoch: Int, correlationId: Int) = {
     this(StopReplicaRequest.CurrentVersion, correlationId, StopReplicaRequest.DefaultClientId,
diff --git a/core/src/main/scala/kafka/api/StopReplicaResponse.scala b/core/src/main/scala/kafka/api/StopReplicaResponse.scala
index c90ddee3d8204..3431f3f65d1ae 100644
--- a/core/src/main/scala/kafka/api/StopReplicaResponse.scala
+++ b/core/src/main/scala/kafka/api/StopReplicaResponse.scala
@@ -42,10 +42,10 @@ object StopReplicaResponse {
 }
 
 
-case class StopReplicaResponse(override val correlationId: Int,
+case class StopReplicaResponse(val correlationId: Int,
                                val responseMap: Map[TopicAndPartition, Short],
                                val errorCode: Short = ErrorMapping.NoError)
-    extends RequestOrResponse(correlationId = correlationId) {
+    extends RequestOrResponse() {
   def sizeInBytes(): Int ={
     var size =
       4 /* correlation id */ + 
diff --git a/core/src/main/scala/kafka/api/TopicMetadata.scala b/core/src/main/scala/kafka/api/TopicMetadata.scala
index 0513a59ed94e5..0190076df0adf 100644
--- a/core/src/main/scala/kafka/api/TopicMetadata.scala
+++ b/core/src/main/scala/kafka/api/TopicMetadata.scala
@@ -21,8 +21,8 @@ import kafka.cluster.Broker
 import java.nio.ByteBuffer
 import kafka.api.ApiUtils._
 import kafka.utils.Logging
-import collection.mutable.ArrayBuffer
 import kafka.common._
+import org.apache.kafka.common.utils.Utils._
 
 object TopicMetadata {
   
@@ -32,9 +32,11 @@ object TopicMetadata {
     val errorCode = readShortInRange(buffer, "error code", (-1, Short.MaxValue))
     val topic = readShortString(buffer)
     val numPartitions = readIntInRange(buffer, "number of partitions", (0, Int.MaxValue))
-    val partitionsMetadata = new ArrayBuffer[PartitionMetadata]()
-    for(i <- 0 until numPartitions)
-      partitionsMetadata += PartitionMetadata.readFrom(buffer, brokers)
+    val partitionsMetadata: Array[PartitionMetadata] = new Array[PartitionMetadata](numPartitions)
+    for(i <- 0 until numPartitions) {
+      val partitionMetadata = PartitionMetadata.readFrom(buffer, brokers)
+      partitionsMetadata(partitionMetadata.partitionId) = partitionMetadata
+    }
     new TopicMetadata(topic, partitionsMetadata, errorCode)
   }
 }
@@ -147,7 +149,7 @@ case class PartitionMetadata(partitionId: Int,
     partitionMetadataString.toString()
   }
 
-  private def formatBroker(broker: Broker) = broker.id + " (" + broker.host + ":" + broker.port + ")"
+  private def formatBroker(broker: Broker) = broker.id + " (" + formatAddress(broker.host, broker.port) + ")"
 }
 
 
diff --git a/core/src/main/scala/kafka/api/TopicMetadataRequest.scala b/core/src/main/scala/kafka/api/TopicMetadataRequest.scala
index a319f2f438bfd..7dca09ce637a4 100644
--- a/core/src/main/scala/kafka/api/TopicMetadataRequest.scala
+++ b/core/src/main/scala/kafka/api/TopicMetadataRequest.scala
@@ -47,10 +47,10 @@ object TopicMetadataRequest extends Logging {
 }
 
 case class TopicMetadataRequest(val versionId: Short,
-                                override val correlationId: Int,
+                                val correlationId: Int,
                                 val clientId: String,
                                 val topics: Seq[String])
- extends RequestOrResponse(Some(RequestKeys.MetadataKey), correlationId){
+ extends RequestOrResponse(Some(RequestKeys.MetadataKey)){
 
   def this(topics: Seq[String], correlationId: Int) =
     this(TopicMetadataRequest.CurrentVersion, correlationId, TopicMetadataRequest.DefaultClientId, topics)
@@ -79,7 +79,7 @@ case class TopicMetadataRequest(val versionId: Short,
     val topicMetadata = topics.map {
       topic => TopicMetadata(topic, Nil, ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
     }
-    val errorResponse = TopicMetadataResponse(topicMetadata, correlationId)
+    val errorResponse = TopicMetadataResponse(Seq(), topicMetadata, correlationId)
     requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(errorResponse)))
   }
 
diff --git a/core/src/main/scala/kafka/api/TopicMetadataResponse.scala b/core/src/main/scala/kafka/api/TopicMetadataResponse.scala
index f6b7429faeab3..92ac4e687be22 100644
--- a/core/src/main/scala/kafka/api/TopicMetadataResponse.scala
+++ b/core/src/main/scala/kafka/api/TopicMetadataResponse.scala
@@ -29,34 +29,27 @@ object TopicMetadataResponse {
     val brokerMap = brokers.map(b => (b.id, b)).toMap
     val topicCount = buffer.getInt
     val topicsMetadata = (0 until topicCount).map(_ => TopicMetadata.readFrom(buffer, brokerMap))
-    new TopicMetadataResponse(topicsMetadata, correlationId)
+    new TopicMetadataResponse(brokers, topicsMetadata, correlationId)
   }
 }
 
-case class TopicMetadataResponse(topicsMetadata: Seq[TopicMetadata],
-                                 override val correlationId: Int)
-    extends RequestOrResponse(correlationId = correlationId) {
+case class TopicMetadataResponse(brokers: Seq[Broker],
+                                 topicsMetadata: Seq[TopicMetadata],
+                                 correlationId: Int)
+    extends RequestOrResponse() {
   val sizeInBytes: Int = {
-    val brokers = extractBrokers(topicsMetadata).values
     4 + 4 + brokers.map(_.sizeInBytes).sum + 4 + topicsMetadata.map(_.sizeInBytes).sum
   }
 
   def writeTo(buffer: ByteBuffer) {
     buffer.putInt(correlationId)
     /* brokers */
-    val brokers = extractBrokers(topicsMetadata).values
     buffer.putInt(brokers.size)
     brokers.foreach(_.writeTo(buffer))
     /* topic metadata */
     buffer.putInt(topicsMetadata.length)
     topicsMetadata.foreach(_.writeTo(buffer))
   }
-    
-  def extractBrokers(topicMetadatas: Seq[TopicMetadata]): Map[Int, Broker] = {
-    val parts = topicsMetadata.flatMap(_.partitionsMetadata)
-    val brokers = (parts.flatMap(_.replicas)) ++ (parts.map(_.leader).collect{case Some(l) => l})
-    brokers.map(b => (b.id, b)).toMap
-  }
 
   override def describe(details: Boolean):String = { toString }
 }
diff --git a/core/src/main/scala/kafka/api/UpdateMetadataRequest.scala b/core/src/main/scala/kafka/api/UpdateMetadataRequest.scala
index 543e262b25a94..530982e36b179 100644
--- a/core/src/main/scala/kafka/api/UpdateMetadataRequest.scala
+++ b/core/src/main/scala/kafka/api/UpdateMetadataRequest.scala
@@ -55,13 +55,13 @@ object UpdateMetadataRequest {
 }
 
 case class UpdateMetadataRequest (versionId: Short,
-                                  override val correlationId: Int,
+                                  correlationId: Int,
                                   clientId: String,
                                   controllerId: Int,
                                   controllerEpoch: Int,
                                   partitionStateInfos: Map[TopicAndPartition, PartitionStateInfo],
                                   aliveBrokers: Set[Broker])
-  extends RequestOrResponse(Some(RequestKeys.UpdateMetadataKey), correlationId) {
+  extends RequestOrResponse(Some(RequestKeys.UpdateMetadataKey)) {
 
   def this(controllerId: Int, controllerEpoch: Int, correlationId: Int, clientId: String,
            partitionStateInfos: Map[TopicAndPartition, PartitionStateInfo], aliveBrokers: Set[Broker]) = {
diff --git a/core/src/main/scala/kafka/api/UpdateMetadataResponse.scala b/core/src/main/scala/kafka/api/UpdateMetadataResponse.scala
index c583c1f00c89a..53f606752055a 100644
--- a/core/src/main/scala/kafka/api/UpdateMetadataResponse.scala
+++ b/core/src/main/scala/kafka/api/UpdateMetadataResponse.scala
@@ -32,9 +32,9 @@ object UpdateMetadataResponse {
   }
 }
 
-case class UpdateMetadataResponse(override val correlationId: Int,
+case class UpdateMetadataResponse(correlationId: Int,
                                   errorCode: Short = ErrorMapping.NoError)
-  extends RequestOrResponse(correlationId = correlationId) {
+  extends RequestOrResponse() {
   def sizeInBytes(): Int = 4 /* correlation id */ + 2 /* error code */
 
   def writeTo(buffer: ByteBuffer) {
diff --git a/core/src/main/scala/kafka/client/ClientUtils.scala b/core/src/main/scala/kafka/client/ClientUtils.scala
index 1d2f81be4f980..ebba87f056668 100644
--- a/core/src/main/scala/kafka/client/ClientUtils.scala
+++ b/core/src/main/scala/kafka/client/ClientUtils.scala
@@ -20,12 +20,17 @@ import scala.collection._
 import kafka.cluster._
 import kafka.api._
 import kafka.producer._
-import kafka.common.KafkaException
+import kafka.common.{ErrorMapping, KafkaException}
 import kafka.utils.{Utils, Logging}
 import java.util.Properties
 import util.Random
+ import kafka.network.BlockingChannel
+ import kafka.utils.ZkUtils._
+ import org.I0Itec.zkclient.ZkClient
+ import java.io.IOException
+import org.apache.kafka.common.utils.Utils.{getHost, getPort}
 
-/**
+ /**
  * Helper functions common to clients (producer, consumer, or admin)
  */
 object ClientUtils extends Logging{
@@ -81,7 +86,7 @@ object ClientUtils extends Logging{
   def fetchTopicMetadata(topics: Set[String], brokers: Seq[Broker], clientId: String, timeoutMs: Int,
                          correlationId: Int = 0): TopicMetadataResponse = {
     val props = new Properties()
-    props.put("metadata.broker.list", brokers.map(_.getConnectionString()).mkString(","))
+    props.put("metadata.broker.list", brokers.map(_.connectionString).mkString(","))
     props.put("client.id", clientId)
     props.put("request.timeout.ms", timeoutMs.toString)
     val producerConfig = new ProducerConfig(props)
@@ -94,14 +99,102 @@ object ClientUtils extends Logging{
   def parseBrokerList(brokerListStr: String): Seq[Broker] = {
     val brokersStr = Utils.parseCsvList(brokerListStr)
 
-    brokersStr.zipWithIndex.map(b =>{
-      val brokerStr = b._1
-      val brokerId = b._2
-      val brokerInfos = brokerStr.split(":")
-      val hostName = brokerInfos(0)
-      val port = brokerInfos(1).toInt
-      new Broker(brokerId, hostName, port)
-    })
+    brokersStr.zipWithIndex.map { case (address, brokerId) =>
+      new Broker(brokerId, getHost(address), getPort(address))
+    }
   }
-  
-}
\ No newline at end of file
+
+   /**
+    * Creates a blocking channel to a random broker
+    */
+   def channelToAnyBroker(zkClient: ZkClient, socketTimeoutMs: Int = 3000) : BlockingChannel = {
+     var channel: BlockingChannel = null
+     var connected = false
+     while (!connected) {
+       val allBrokers = getAllBrokersInCluster(zkClient)
+       Random.shuffle(allBrokers).find { broker =>
+         trace("Connecting to broker %s:%d.".format(broker.host, broker.port))
+         try {
+           channel = new BlockingChannel(broker.host, broker.port, BlockingChannel.UseDefaultBufferSize, BlockingChannel.UseDefaultBufferSize, socketTimeoutMs)
+           channel.connect()
+           debug("Created channel to broker %s:%d.".format(channel.host, channel.port))
+           true
+         } catch {
+           case e: Exception =>
+             if (channel != null) channel.disconnect()
+             channel = null
+             info("Error while creating channel to %s:%d.".format(broker.host, broker.port))
+             false
+         }
+       }
+       connected = if (channel == null) false else true
+     }
+
+     channel
+   }
+
+   /**
+    * Creates a blocking channel to the offset manager of the given group
+    */
+   def channelToOffsetManager(group: String, zkClient: ZkClient, socketTimeoutMs: Int = 3000, retryBackOffMs: Int = 1000) = {
+     var queryChannel = channelToAnyBroker(zkClient)
+
+     var offsetManagerChannelOpt: Option[BlockingChannel] = None
+
+     while (!offsetManagerChannelOpt.isDefined) {
+
+       var coordinatorOpt: Option[Broker] = None
+
+       while (!coordinatorOpt.isDefined) {
+         try {
+           if (!queryChannel.isConnected)
+             queryChannel = channelToAnyBroker(zkClient)
+           debug("Querying %s:%d to locate offset manager for %s.".format(queryChannel.host, queryChannel.port, group))
+           queryChannel.send(ConsumerMetadataRequest(group))
+           val response = queryChannel.receive()
+           val consumerMetadataResponse =  ConsumerMetadataResponse.readFrom(response.buffer)
+           debug("Consumer metadata response: " + consumerMetadataResponse.toString)
+           if (consumerMetadataResponse.errorCode == ErrorMapping.NoError)
+             coordinatorOpt = consumerMetadataResponse.coordinatorOpt
+           else {
+             debug("Query to %s:%d to locate offset manager for %s failed - will retry in %d milliseconds."
+                  .format(queryChannel.host, queryChannel.port, group, retryBackOffMs))
+             Thread.sleep(retryBackOffMs)
+           }
+         }
+         catch {
+           case ioe: IOException =>
+             info("Failed to fetch consumer metadata from %s:%d.".format(queryChannel.host, queryChannel.port))
+             queryChannel.disconnect()
+         }
+       }
+
+       val coordinator = coordinatorOpt.get
+       if (coordinator.host == queryChannel.host && coordinator.port == queryChannel.port) {
+         offsetManagerChannelOpt = Some(queryChannel)
+       } else {
+         val connectString = "%s:%d".format(coordinator.host, coordinator.port)
+         var offsetManagerChannel: BlockingChannel = null
+         try {
+           debug("Connecting to offset manager %s.".format(connectString))
+           offsetManagerChannel = new BlockingChannel(coordinator.host, coordinator.port,
+                                                      BlockingChannel.UseDefaultBufferSize,
+                                                      BlockingChannel.UseDefaultBufferSize,
+                                                      socketTimeoutMs)
+           offsetManagerChannel.connect()
+           offsetManagerChannelOpt = Some(offsetManagerChannel)
+           queryChannel.disconnect()
+         }
+         catch {
+           case ioe: IOException => // offsets manager may have moved
+             info("Error while connecting to %s.".format(connectString))
+             if (offsetManagerChannel != null) offsetManagerChannel.disconnect()
+             Thread.sleep(retryBackOffMs)
+             offsetManagerChannelOpt = None // just in case someone decides to change shutdownChannel to not swallow exceptions
+         }
+       }
+     }
+
+     offsetManagerChannelOpt.get
+   }
+ }
diff --git a/core/src/main/scala/kafka/cluster/Broker.scala b/core/src/main/scala/kafka/cluster/Broker.scala
index 9407ed21fbbd5..0060add008bb3 100644
--- a/core/src/main/scala/kafka/cluster/Broker.scala
+++ b/core/src/main/scala/kafka/cluster/Broker.scala
@@ -22,11 +22,12 @@ import kafka.utils.Json
 import kafka.api.ApiUtils._
 import java.nio.ByteBuffer
 import kafka.common.{KafkaException, BrokerNotAvailableException}
+import org.apache.kafka.common.utils.Utils._
 
 /**
  * A Kafka broker
  */
-private[kafka] object Broker {
+object Broker {
 
   def createBroker(id: Int, brokerInfoString: String): Broker = {
     if(brokerInfoString == null)
@@ -54,11 +55,11 @@ private[kafka] object Broker {
   }
 }
 
-private[kafka] case class Broker(val id: Int, val host: String, val port: Int) {
+case class Broker(id: Int, host: String, port: Int) {
   
-  override def toString(): String = new String("id:" + id + ",host:" + host + ",port:" + port)
+  override def toString: String = "id:" + id + ",host:" + host + ",port:" + port
 
-  def getConnectionString(): String = host + ":" + port
+  def connectionString: String = formatAddress(host, port)
 
   def writeTo(buffer: ByteBuffer) {
     buffer.putInt(id)
diff --git a/core/src/main/scala/kafka/cluster/Partition.scala b/core/src/main/scala/kafka/cluster/Partition.scala
index 1087a2e91c86e..b230e9a1fb1a3 100644
--- a/core/src/main/scala/kafka/cluster/Partition.scala
+++ b/core/src/main/scala/kafka/cluster/Partition.scala
@@ -16,20 +16,22 @@
  */
 package kafka.cluster
 
-import scala.collection._
-import kafka.admin.AdminUtils
+import kafka.common._
 import kafka.utils._
-import java.lang.Object
+import kafka.utils.Utils.{inReadLock,inWriteLock}
+import kafka.admin.AdminUtils
 import kafka.api.{PartitionStateInfo, LeaderAndIsr}
 import kafka.log.LogConfig
-import kafka.server.ReplicaManager
-import com.yammer.metrics.core.Gauge
+import kafka.server.{TopicPartitionOperationKey, LogOffsetMetadata, OffsetManager, ReplicaManager}
 import kafka.metrics.KafkaMetricsGroup
 import kafka.controller.KafkaController
-import org.apache.log4j.Logger
 import kafka.message.ByteBufferMessageSet
-import kafka.common.{NotAssignedReplicaException, TopicAndPartition, NotLeaderForPartitionException, ErrorMapping}
+
 import java.io.IOException
+import java.util.concurrent.locks.ReentrantReadWriteLock
+import scala.collection.immutable.Set
+
+import com.yammer.metrics.core.Gauge
 
 
 /**
@@ -37,18 +39,18 @@ import java.io.IOException
  */
 class Partition(val topic: String,
                 val partitionId: Int,
-                var replicationFactor: Int,
                 time: Time,
-                val replicaManager: ReplicaManager) extends Logging with KafkaMetricsGroup {
+                replicaManager: ReplicaManager) extends Logging with KafkaMetricsGroup {
   private val localBrokerId = replicaManager.config.brokerId
   private val logManager = replicaManager.logManager
   private val zkClient = replicaManager.zkClient
-  var leaderReplicaIdOpt: Option[Int] = None
-  var inSyncReplicas: Set[Replica] = Set.empty[Replica]
-  private val assignedReplicaMap = new Pool[Int,Replica]
-  private val leaderIsrUpdateLock = new Object
+  private val assignedReplicaMap = new Pool[Int, Replica]
+  // The read lock is only required when multiple reads are executed and needs to be in a consistent manner
+  private val leaderIsrUpdateLock = new ReentrantReadWriteLock()
   private var zkVersion: Int = LeaderAndIsr.initialZKVersion
-  private var leaderEpoch: Int = LeaderAndIsr.initialLeaderEpoch - 1
+  @volatile private var leaderEpoch: Int = LeaderAndIsr.initialLeaderEpoch - 1
+  @volatile var leaderReplicaIdOpt: Option[Int] = None
+  @volatile var inSyncReplicas: Set[Replica] = Set.empty[Replica]
   /* Epoch of the controller that last changed the leader. This needs to be initialized correctly upon broker startup.
    * One way of doing that is through the controller's start replica state change command. When a new broker starts up
    * the controller sends it a start replica command containing the leader for each partition that the broker hosts.
@@ -56,27 +58,24 @@ class Partition(val topic: String,
    * each partition. */
   private var controllerEpoch: Int = KafkaController.InitialControllerEpoch - 1
   this.logIdent = "Partition [%s,%d] on broker %d: ".format(topic, partitionId, localBrokerId)
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
 
   private def isReplicaLocal(replicaId: Int) : Boolean = (replicaId == localBrokerId)
 
-  newGauge(
-    topic + "-" + partitionId + "-UnderReplicated",
+  newGauge("UnderReplicated",
     new Gauge[Int] {
       def value = {
         if (isUnderReplicated) 1 else 0
       }
-    }
+    },
+    Map("topic" -> topic, "partition" -> partitionId.toString)
   )
 
   def isUnderReplicated(): Boolean = {
-    leaderIsrUpdateLock synchronized {
-      leaderReplicaIfLocal() match {
-        case Some(_) =>
-          inSyncReplicas.size < assignedReplicas.size
-        case None =>
-          false
-      }
+    leaderReplicaIfLocal() match {
+      case Some(_) =>
+        inSyncReplicas.size < assignedReplicas.size
+      case None =>
+        false
     }
   }
 
@@ -88,7 +87,7 @@ class Partition(val topic: String,
         if (isReplicaLocal(replicaId)) {
           val config = LogConfig.fromProps(logManager.defaultConfig.toProps, AdminUtils.fetchTopicConfig(zkClient, topic))
           val log = logManager.createLog(TopicAndPartition(topic, partitionId), config)
-          val checkpoint = replicaManager.highWatermarkCheckpoints(log.dir.getParent)
+          val checkpoint = replicaManager.highWatermarkCheckpoints(log.dir.getParentFile.getAbsolutePath)
           val offsetMap = checkpoint.read
           if (!offsetMap.contains(TopicAndPartition(topic, partitionId)))
             warn("No checkpointed highwatermark is found for partition [%s,%d]".format(topic, partitionId))
@@ -112,15 +111,13 @@ class Partition(val topic: String,
   }
 
   def leaderReplicaIfLocal(): Option[Replica] = {
-    leaderIsrUpdateLock synchronized {
-      leaderReplicaIdOpt match {
-        case Some(leaderReplicaId) =>
-          if (leaderReplicaId == localBrokerId)
-            getReplica(localBrokerId)
-          else
-            None
-        case None => None
-      }
+    leaderReplicaIdOpt match {
+      case Some(leaderReplicaId) =>
+        if (leaderReplicaId == localBrokerId)
+          getReplica(localBrokerId)
+        else
+          None
+      case None => None
     }
   }
 
@@ -138,7 +135,7 @@ class Partition(val topic: String,
 
   def delete() {
     // need to hold the lock to prevent appendMessagesToLeader() from hitting I/O exceptions due to log being deleted
-    leaderIsrUpdateLock synchronized {
+    inWriteLock(leaderIsrUpdateLock) {
       assignedReplicaMap.clear()
       inSyncReplicas = Set.empty[Replica]
       leaderReplicaIdOpt = None
@@ -153,9 +150,7 @@ class Partition(val topic: String,
   }
 
   def getLeaderEpoch(): Int = {
-    leaderIsrUpdateLock synchronized {
-      return this.leaderEpoch
-    }
+    return this.leaderEpoch
   }
 
   /**
@@ -163,8 +158,9 @@ class Partition(val topic: String,
    *  and setting the new leader and ISR
    */
   def makeLeader(controllerId: Int,
-                 partitionStateInfo: PartitionStateInfo, correlationId: Int): Boolean = {
-    leaderIsrUpdateLock synchronized {
+                 partitionStateInfo: PartitionStateInfo, correlationId: Int,
+                 offsetManager: OffsetManager): Boolean = {
+    inWriteLock(leaderIsrUpdateLock) {
       val allReplicas = partitionStateInfo.allReplicas
       val leaderIsrAndControllerEpoch = partitionStateInfo.leaderIsrAndControllerEpoch
       val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
@@ -176,25 +172,31 @@ class Partition(val topic: String,
       val newInSyncReplicas = leaderAndIsr.isr.map(r => getOrCreateReplica(r)).toSet
       // remove assigned replicas that have been removed by the controller
       (assignedReplicas().map(_.brokerId) -- allReplicas).foreach(removeReplica(_))
-      // reset LogEndOffset for remote replicas
-      assignedReplicas.foreach(r => if (r.brokerId != localBrokerId) r.logEndOffset = ReplicaManager.UnknownLogEndOffset)
       inSyncReplicas = newInSyncReplicas
       leaderEpoch = leaderAndIsr.leaderEpoch
       zkVersion = leaderAndIsr.zkVersion
       leaderReplicaIdOpt = Some(localBrokerId)
+      // construct the high watermark metadata for the new leader replica
+      val newLeaderReplica = getReplica().get
+      newLeaderReplica.convertHWToLocalOffsetMetadata()
+      // reset log end offset for remote replicas
+      assignedReplicas.foreach(r => if (r.brokerId != localBrokerId) r.logEndOffset = LogOffsetMetadata.UnknownOffsetMetadata)
       // we may need to increment high watermark since ISR could be down to 1
-      maybeIncrementLeaderHW(getReplica().get)
+      maybeIncrementLeaderHW(newLeaderReplica)
+      if (topic == OffsetManager.OffsetsTopicName)
+        offsetManager.loadOffsetsFromLog(partitionId)
       true
     }
   }
 
   /**
    *  Make the local replica the follower by setting the new leader and ISR to empty
+   *  If the leader replica id does not change, return false to indicate the replica manager
    */
   def makeFollower(controllerId: Int,
                    partitionStateInfo: PartitionStateInfo,
-                   leaders: Set[Broker], correlationId: Int): Boolean = {
-    leaderIsrUpdateLock synchronized {
+                   correlationId: Int, offsetManager: OffsetManager): Boolean = {
+    inWriteLock(leaderIsrUpdateLock) {
       val allReplicas = partitionStateInfo.allReplicas
       val leaderIsrAndControllerEpoch = partitionStateInfo.leaderIsrAndControllerEpoch
       val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
@@ -202,39 +204,59 @@ class Partition(val topic: String,
       // record the epoch of the controller that made the leadership decision. This is useful while updating the isr
       // to maintain the decision maker controller's epoch in the zookeeper path
       controllerEpoch = leaderIsrAndControllerEpoch.controllerEpoch
-      // TODO: Delete leaders from LeaderAndIsrRequest in 0.8.1
-      leaders.find(_.id == newLeaderBrokerId) match {
-        case Some(leaderBroker) =>
-          // add replicas that are new
-          allReplicas.foreach(r => getOrCreateReplica(r))
-          // remove assigned replicas that have been removed by the controller
-          (assignedReplicas().map(_.brokerId) -- allReplicas).foreach(removeReplica(_))
-          inSyncReplicas = Set.empty[Replica]
-          leaderEpoch = leaderAndIsr.leaderEpoch
-          zkVersion = leaderAndIsr.zkVersion
-          leaderReplicaIdOpt = Some(newLeaderBrokerId)
-        case None => // we should not come here
-          stateChangeLogger.error(("Broker %d aborted the become-follower state change with correlation id %d from " +
-                                   "controller %d epoch %d for partition [%s,%d] new leader %d")
-                                    .format(localBrokerId, correlationId, controllerId, leaderIsrAndControllerEpoch.controllerEpoch,
-                                            topic, partitionId, newLeaderBrokerId))
+      // add replicas that are new
+      allReplicas.foreach(r => getOrCreateReplica(r))
+      // remove assigned replicas that have been removed by the controller
+      (assignedReplicas().map(_.brokerId) -- allReplicas).foreach(removeReplica(_))
+      inSyncReplicas = Set.empty[Replica]
+      leaderEpoch = leaderAndIsr.leaderEpoch
+      zkVersion = leaderAndIsr.zkVersion
+
+      leaderReplicaIdOpt.foreach { leaderReplica =>
+        if (topic == OffsetManager.OffsetsTopicName &&
+           /* if we are making a leader->follower transition */
+           leaderReplica == localBrokerId)
+          offsetManager.clearOffsetsInPartition(partitionId)
+      }
+
+      if (leaderReplicaIdOpt.isDefined && leaderReplicaIdOpt.get == newLeaderBrokerId) {
+        false
+      }
+      else {
+        leaderReplicaIdOpt = Some(newLeaderBrokerId)
+        true
       }
-      true
     }
   }
 
-  def updateLeaderHWAndMaybeExpandIsr(replicaId: Int, offset: Long) {
-    leaderIsrUpdateLock synchronized {
-      debug("Recording follower %d position %d for partition [%s,%d].".format(replicaId, offset, topic, partitionId))
-      val replicaOpt = getReplica(replicaId)
-      if(!replicaOpt.isDefined) {
-        throw new NotAssignedReplicaException(("Leader %d failed to record follower %d's position %d for partition [%s,%d] since the replica %d" +
+  /**
+   * Update the log end offset of a certain replica of this partition
+   */
+  def updateReplicaLEO(replicaId: Int, offset: LogOffsetMetadata) {
+    getReplica(replicaId) match {
+      case Some(replica) =>
+        replica.logEndOffset = offset
+
+        // check if we need to expand ISR to include this replica
+        // if it is not in the ISR yet
+        maybeExpandIsr(replicaId)
+
+        debug("Recorded replica %d log end offset (LEO) position %d for partition %s."
+          .format(replicaId, offset.messageOffset, TopicAndPartition(topic, partitionId)))
+      case None =>
+        throw new NotAssignedReplicaException(("Leader %d failed to record follower %d's position %d since the replica" +
           " is not recognized to be one of the assigned replicas %s for partition [%s,%d]").format(localBrokerId, replicaId,
-            offset, topic, partitionId, replicaId, assignedReplicas().map(_.brokerId).mkString(","), topic, partitionId))
-      }
-      val replica = replicaOpt.get
-      replica.logEndOffset = offset
+          offset.messageOffset, assignedReplicas().map(_.brokerId).mkString(","), topic, partitionId))
+    }
+  }
 
+  /**
+   * Check and maybe expand the ISR of the partition.
+   *
+   * This function can be triggered when a replica's LEO has incremented
+   */
+  def maybeExpandIsr(replicaId: Int) {
+    inWriteLock(leaderIsrUpdateLock) {
       // check if this replica needs to be added to the ISR
       leaderReplicaIfLocal() match {
         case Some(leaderReplica) =>
@@ -243,8 +265,10 @@ class Partition(val topic: String,
           // For a replica to get added back to ISR, it has to satisfy 3 conditions-
           // 1. It is not already in the ISR
           // 2. It is part of the assigned replica list. See KAFKA-1097
-          // 3. It's log end offset >= leader's highwatermark
-          if (!inSyncReplicas.contains(replica) && assignedReplicas.map(_.brokerId).contains(replicaId) && replica.logEndOffset >= leaderHW) {
+          // 3. It's log end offset >= leader's high watermark
+          if (!inSyncReplicas.contains(replica) &&
+            assignedReplicas.map(_.brokerId).contains(replicaId) &&
+            replica.logEndOffset.offsetDiff(leaderHW) >= 0) {
             // expand ISR
             val newInSyncReplicas = inSyncReplicas + replica
             info("Expanding ISR for partition [%s,%d] from %s to %s"
@@ -253,58 +277,84 @@ class Partition(val topic: String,
             updateIsr(newInSyncReplicas)
             replicaManager.isrExpandRate.mark()
           }
+          // check if the HW of the partition can now be incremented
+          // since the replica maybe now be in the ISR and its LEO has just incremented
           maybeIncrementLeaderHW(leaderReplica)
+
         case None => // nothing to do if no longer leader
       }
     }
   }
 
   def checkEnoughReplicasReachOffset(requiredOffset: Long, requiredAcks: Int): (Boolean, Short) = {
-    leaderIsrUpdateLock synchronized {
-      leaderReplicaIfLocal() match {
-        case Some(_) =>
-          val numAcks = inSyncReplicas.count(r => {
-            if (!r.isLocal)
-              r.logEndOffset >= requiredOffset
-            else
-              true /* also count the local (leader) replica */
-          })
-          trace("%d/%d acks satisfied for %s-%d".format(numAcks, requiredAcks, topic, partitionId))
-          if ((requiredAcks < 0 && numAcks >= inSyncReplicas.size) ||
-            (requiredAcks > 0 && numAcks >= requiredAcks)) {
-            /*
-            * requiredAcks < 0 means acknowledge after all replicas in ISR
-            * are fully caught up to the (local) leader's offset
-            * corresponding to this produce request.
-            */
+    leaderReplicaIfLocal() match {
+      case Some(leaderReplica) =>
+        // keep the current immutable replica list reference
+        val curInSyncReplicas = inSyncReplicas
+        val numAcks = curInSyncReplicas.count(r => {
+          if (!r.isLocal)
+            r.logEndOffset.messageOffset >= requiredOffset
+          else
+            true /* also count the local (leader) replica */
+        })
+        val minIsr = leaderReplica.log.get.config.minInSyncReplicas
+
+        trace("%d/%d acks satisfied for %s-%d".format(numAcks, requiredAcks, topic, partitionId))
+
+        if (requiredAcks < 0 && leaderReplica.highWatermark.messageOffset >= requiredOffset ) {
+          /*
+          * requiredAcks < 0 means acknowledge after all replicas in ISR
+          * are fully caught up to the (local) leader's offset
+          * corresponding to this produce request.
+          *
+          * minIsr means that the topic is configured not to accept messages
+          * if there are not enough replicas in ISR
+          * in this scenario the request was already appended locally and
+          * then added to the purgatory before the ISR was shrunk
+          */
+          if (minIsr <= curInSyncReplicas.size) {
             (true, ErrorMapping.NoError)
-          } else
-            (false, ErrorMapping.NoError)
-        case None =>
-          (false, ErrorMapping.NotLeaderForPartitionCode)
-      }
+          } else {
+            (true, ErrorMapping.NotEnoughReplicasAfterAppendCode)
+          }
+        } else if (requiredAcks > 0 && numAcks >= requiredAcks) {
+          (true, ErrorMapping.NoError)
+        } else
+          (false, ErrorMapping.NoError)
+      case None =>
+        (false, ErrorMapping.NotLeaderForPartitionCode)
     }
   }
 
   /**
-   * There is no need to acquire the leaderIsrUpdate lock here since all callers of this private API acquire that lock
-   * @param leaderReplica
+   * Check and maybe increment the high watermark of the partition;
+   * this function can be triggered when
+   *
+   * 1. Partition ISR changed
+   * 2. Any replica's LEO changed
+   *
+   * Note There is no need to acquire the leaderIsrUpdate lock here
+   * since all callers of this private API acquire that lock
    */
   private def maybeIncrementLeaderHW(leaderReplica: Replica) {
     val allLogEndOffsets = inSyncReplicas.map(_.logEndOffset)
-    val newHighWatermark = allLogEndOffsets.min
+    val newHighWatermark = allLogEndOffsets.min(new LogOffsetMetadata.OffsetOrdering)
     val oldHighWatermark = leaderReplica.highWatermark
-    if(newHighWatermark > oldHighWatermark) {
+    if(oldHighWatermark.precedes(newHighWatermark)) {
       leaderReplica.highWatermark = newHighWatermark
-      debug("Highwatermark for partition [%s,%d] updated to %d".format(topic, partitionId, newHighWatermark))
+      debug("High watermark for partition [%s,%d] updated to %s".format(topic, partitionId, newHighWatermark))
+      // some delayed operations may be unblocked after HW changed
+      val requestKey = new TopicPartitionOperationKey(this.topic, this.partitionId)
+      replicaManager.tryCompleteDelayedFetch(requestKey)
+      replicaManager.tryCompleteDelayedProduce(requestKey)
+    } else {
+      debug("Skipping update high watermark since Old hw %s is larger than new hw %s for partition [%s,%d]. All leo's are %s"
+        .format(oldHighWatermark, newHighWatermark, topic, partitionId, allLogEndOffsets.mkString(",")))
     }
-    else
-      debug("Old hw for partition [%s,%d] is %d. New hw is %d. All leo's are %s"
-        .format(topic, partitionId, oldHighWatermark, newHighWatermark, allLogEndOffsets.mkString(",")))
   }
 
   def maybeShrinkIsr(replicaMaxLagTimeMs: Long,  replicaMaxLagMessages: Long) {
-    leaderIsrUpdateLock synchronized {
+    inWriteLock(leaderIsrUpdateLock) {
       leaderReplicaIfLocal() match {
         case Some(leaderReplica) =>
           val outOfSyncReplicas = getOutOfSyncReplicas(leaderReplica, replicaMaxLagTimeMs, replicaMaxLagMessages)
@@ -339,19 +389,32 @@ class Partition(val topic: String,
     if(stuckReplicas.size > 0)
       debug("Stuck replicas for partition [%s,%d] are %s".format(topic, partitionId, stuckReplicas.map(_.brokerId).mkString(",")))
     // Case 2 above
-    val slowReplicas = candidateReplicas.filter(r => r.logEndOffset >= 0 && (leaderLogEndOffset - r.logEndOffset) > keepInSyncMessages)
+    val slowReplicas = candidateReplicas.filter(r =>
+      r.logEndOffset.messageOffset >= 0 &&
+      leaderLogEndOffset.messageOffset - r.logEndOffset.messageOffset > keepInSyncMessages)
     if(slowReplicas.size > 0)
       debug("Slow replicas for partition [%s,%d] are %s".format(topic, partitionId, slowReplicas.map(_.brokerId).mkString(",")))
     stuckReplicas ++ slowReplicas
   }
 
-  def appendMessagesToLeader(messages: ByteBufferMessageSet) = {
-    leaderIsrUpdateLock synchronized {
+  def appendMessagesToLeader(messages: ByteBufferMessageSet, requiredAcks: Int = 0) = {
+    inReadLock(leaderIsrUpdateLock) {
       val leaderReplicaOpt = leaderReplicaIfLocal()
       leaderReplicaOpt match {
         case Some(leaderReplica) =>
           val log = leaderReplica.log.get
+          val minIsr = log.config.minInSyncReplicas
+          val inSyncSize = inSyncReplicas.size
+
+          // Avoid writing to leader if there are not enough insync replicas to make it safe
+          if (inSyncSize < minIsr && requiredAcks == -1) {
+            throw new NotEnoughReplicasException("Number of insync replicas for partition [%s,%d] is [%d], below required minimum [%d]"
+              .format(topic,partitionId,minIsr,inSyncSize))
+          }
+
           val info = log.append(messages, assignOffsets = true)
+          // probably unblock some follower fetch requests since log end offset has been updated
+          replicaManager.tryCompleteDelayedFetch(new TopicPartitionOperationKey(this.topic, this.partitionId))
           // we may need to increment high watermark since ISR could be down to 1
           maybeIncrementLeaderHW(leaderReplica)
           info
@@ -363,13 +426,10 @@ class Partition(val topic: String,
   }
 
   private def updateIsr(newIsr: Set[Replica]) {
-    debug("Updated ISR for partition [%s,%d] to %s".format(topic, partitionId, newIsr.mkString(",")))
     val newLeaderAndIsr = new LeaderAndIsr(localBrokerId, leaderEpoch, newIsr.map(r => r.brokerId).toList, zkVersion)
-    // use the epoch of the controller that made the leadership decision, instead of the current controller epoch
-    val (updateSucceeded, newVersion) = ZkUtils.conditionalUpdatePersistentPath(zkClient,
-      ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partitionId),
-      ZkUtils.leaderAndIsrZkData(newLeaderAndIsr, controllerEpoch), zkVersion)
-    if (updateSucceeded){
+    val (updateSucceeded,newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topic, partitionId,
+      newLeaderAndIsr, controllerEpoch, zkVersion)
+    if(updateSucceeded) {
       inSyncReplicas = newIsr
       zkVersion = newVersion
       trace("ISR updated to [%s] and zkVersion updated to [%d]".format(newIsr.mkString(","), zkVersion))
@@ -392,14 +452,12 @@ class Partition(val topic: String,
   }
 
   override def toString(): String = {
-    leaderIsrUpdateLock synchronized {
-      val partitionString = new StringBuilder
-      partitionString.append("Topic: " + topic)
-      partitionString.append("; Partition: " + partitionId)
-      partitionString.append("; Leader: " + leaderReplicaIdOpt)
-      partitionString.append("; AssignedReplicas: " + assignedReplicaMap.keys.mkString(","))
-      partitionString.append("; InSyncReplicas: " + inSyncReplicas.map(_.brokerId).mkString(","))
-      partitionString.toString()
-    }
+    val partitionString = new StringBuilder
+    partitionString.append("Topic: " + topic)
+    partitionString.append("; Partition: " + partitionId)
+    partitionString.append("; Leader: " + leaderReplicaIdOpt)
+    partitionString.append("; AssignedReplicas: " + assignedReplicaMap.keys.mkString(","))
+    partitionString.append("; InSyncReplicas: " + inSyncReplicas.map(_.brokerId).mkString(","))
+    partitionString.toString()
   }
 }
diff --git a/core/src/main/scala/kafka/cluster/Replica.scala b/core/src/main/scala/kafka/cluster/Replica.scala
index 5e659b4a5c025..bd13c20338ce3 100644
--- a/core/src/main/scala/kafka/cluster/Replica.scala
+++ b/core/src/main/scala/kafka/cluster/Replica.scala
@@ -19,8 +19,9 @@ package kafka.cluster
 
 import kafka.log.Log
 import kafka.utils.{SystemTime, Time, Logging}
+import kafka.server.LogOffsetMetadata
 import kafka.common.KafkaException
-import kafka.server.ReplicaManager
+
 import java.util.concurrent.atomic.AtomicLong
 
 class Replica(val brokerId: Int,
@@ -28,33 +29,17 @@ class Replica(val brokerId: Int,
               time: Time = SystemTime,
               initialHighWatermarkValue: Long = 0L,
               val log: Option[Log] = None) extends Logging {
-  //only defined in local replica
-  private[this] var highWatermarkValue: AtomicLong = new AtomicLong(initialHighWatermarkValue)
-  // only used for remote replica; logEndOffsetValue for local replica is kept in log
-  private[this] var logEndOffsetValue = new AtomicLong(ReplicaManager.UnknownLogEndOffset)
-  private[this] var logEndOffsetUpdateTimeMsValue: AtomicLong = new AtomicLong(time.milliseconds)
+  // the high watermark offset value, in non-leader replicas only its message offsets are kept
+  @volatile private[this] var highWatermarkMetadata: LogOffsetMetadata = new LogOffsetMetadata(initialHighWatermarkValue)
+  // the log end offset value, kept in all replicas;
+  // for local replica it is the log's end offset, for remote replicas its value is only updated by follower fetch
+  @volatile private[this] var logEndOffsetMetadata: LogOffsetMetadata = LogOffsetMetadata.UnknownOffsetMetadata
+  // the time when log offset is updated
+  private[this] val logEndOffsetUpdateTimeMsValue = new AtomicLong(time.milliseconds)
+
   val topic = partition.topic
   val partitionId = partition.partitionId
 
-  def logEndOffset_=(newLogEndOffset: Long) {
-    if (!isLocal) {
-      logEndOffsetValue.set(newLogEndOffset)
-      logEndOffsetUpdateTimeMsValue.set(time.milliseconds)
-      trace("Setting log end offset for replica %d for partition [%s,%d] to %d"
-            .format(brokerId, topic, partitionId, logEndOffsetValue.get()))
-    } else
-      throw new KafkaException("Shouldn't set logEndOffset for replica %d partition [%s,%d] since it's local"
-          .format(brokerId, topic, partitionId))
-
-  }
-
-  def logEndOffset = {
-    if (isLocal)
-      log.get.logEndOffset
-    else
-      logEndOffsetValue.get()
-  }
-  
   def isLocal: Boolean = {
     log match {
       case Some(l) => true
@@ -62,24 +47,43 @@ class Replica(val brokerId: Int,
     }
   }
 
-  def logEndOffsetUpdateTimeMs = logEndOffsetUpdateTimeMsValue.get()
-
-  def highWatermark_=(newHighWatermark: Long) {
+  def logEndOffset_=(newLogEndOffset: LogOffsetMetadata) {
     if (isLocal) {
-      trace("Setting hw for replica %d partition [%s,%d] on broker %d to %d"
-              .format(brokerId, topic, partitionId, brokerId, newHighWatermark))
-      highWatermarkValue.set(newHighWatermark)
-    } else
-      throw new KafkaException("Unable to set highwatermark for replica %d partition [%s,%d] since it's not local"
-              .format(brokerId, topic, partitionId))
+      throw new KafkaException("Should not set log end offset on partition [%s,%d]'s local replica %d".format(topic, partitionId, brokerId))
+    } else {
+      logEndOffsetMetadata = newLogEndOffset
+      logEndOffsetUpdateTimeMsValue.set(time.milliseconds)
+      trace("Setting log end offset for replica %d for partition [%s,%d] to [%s]"
+        .format(brokerId, topic, partitionId, logEndOffsetMetadata))
+    }
   }
 
-  def highWatermark = {
+  def logEndOffset =
     if (isLocal)
-      highWatermarkValue.get()
+      log.get.logEndOffsetMetadata
     else
-      throw new KafkaException("Unable to get highwatermark for replica %d partition [%s,%d] since it's not local"
-              .format(brokerId, topic, partitionId))
+      logEndOffsetMetadata
+
+  def logEndOffsetUpdateTimeMs = logEndOffsetUpdateTimeMsValue.get()
+
+  def highWatermark_=(newHighWatermark: LogOffsetMetadata) {
+    if (isLocal) {
+      highWatermarkMetadata = newHighWatermark
+      trace("Setting high watermark for replica %d partition [%s,%d] on broker %d to [%s]"
+        .format(brokerId, topic, partitionId, brokerId, newHighWatermark))
+    } else {
+      throw new KafkaException("Should not set high watermark on partition [%s,%d]'s non-local replica %d".format(topic, partitionId, brokerId))
+    }
+  }
+
+  def highWatermark = highWatermarkMetadata
+
+  def convertHWToLocalOffsetMetadata() = {
+    if (isLocal) {
+      highWatermarkMetadata = log.get.convertToOffsetMetadata(highWatermarkMetadata.messageOffset)
+    } else {
+      throw new KafkaException("Should not construct complete high watermark on partition [%s,%d]'s non-local replica %d".format(topic, partitionId, brokerId))
+    }
   }
 
   override def equals(that: Any): Boolean = {
diff --git a/core/src/main/scala/kafka/common/AppInfo.scala b/core/src/main/scala/kafka/common/AppInfo.scala
new file mode 100644
index 0000000000000..d642ca555f83c
--- /dev/null
+++ b/core/src/main/scala/kafka/common/AppInfo.scala
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.common
+
+import java.net.URL
+import java.util.jar.{Attributes, Manifest}
+
+import com.yammer.metrics.core.Gauge
+import kafka.metrics.KafkaMetricsGroup
+
+object AppInfo extends KafkaMetricsGroup {
+  private var isRegistered = false
+  private val lock = new Object()
+
+  def registerInfo(): Unit = {
+    lock.synchronized {
+      if (isRegistered) {
+        return
+      }
+    }
+
+    try {
+      val clazz = AppInfo.getClass
+      val className = clazz.getSimpleName + ".class"
+      val classPath = clazz.getResource(className).toString
+      if (!classPath.startsWith("jar")) {
+        // Class not from JAR
+        return
+      }
+      val manifestPath = classPath.substring(0, classPath.lastIndexOf("!") + 1) + "/META-INF/MANIFEST.MF"
+
+      val mf = new Manifest
+      mf.read(new URL(manifestPath).openStream())
+      val version = mf.getMainAttributes.get(new Attributes.Name("Version")).toString
+
+      newGauge("Version",
+        new Gauge[String] {
+          def value = {
+            version
+          }
+        })
+
+      lock.synchronized {
+        isRegistered = true
+      }
+    } catch {
+      case e: Exception =>
+        warn("Can't read Kafka version from MANIFEST.MF. Possible cause: %s".format(e))
+    }
+  }
+}
diff --git a/core/src/main/scala/kafka/common/ClientIdAndBroker.scala b/core/src/main/scala/kafka/common/ClientIdAndBroker.scala
index 93223a9c93b55..3b09041d33ac4 100644
--- a/core/src/main/scala/kafka/common/ClientIdAndBroker.scala
+++ b/core/src/main/scala/kafka/common/ClientIdAndBroker.scala
@@ -8,7 +8,7 @@ package kafka.common
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -21,6 +21,14 @@ package kafka.common
  * Convenience case class since (clientId, brokerInfo) pairs are used to create
  * SyncProducer Request Stats and SimpleConsumer Request and Response Stats.
  */
-case class ClientIdAndBroker(clientId: String, brokerInfo: String) {
-  override def toString = "%s-%s".format(clientId, brokerInfo)
+
+trait ClientIdBroker {
+}
+
+case class ClientIdAndBroker(clientId: String, brokerHost: String, brokerPort: Int) extends ClientIdBroker {
+  override def toString = "%s-%s-%d".format(clientId, brokerHost, brokerPort)
+}
+
+case class ClientIdAllBrokers(clientId: String) extends ClientIdBroker {
+  override def toString = "%s-%s".format(clientId, "AllBrokers")
 }
diff --git a/core/src/main/scala/kafka/common/ClientIdAndTopic.scala b/core/src/main/scala/kafka/common/ClientIdAndTopic.scala
index 7acf9e76bdd87..5825aad2c8d1a 100644
--- a/core/src/main/scala/kafka/common/ClientIdAndTopic.scala
+++ b/core/src/main/scala/kafka/common/ClientIdAndTopic.scala
@@ -1,5 +1,3 @@
-package kafka.common
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,11 +15,21 @@ package kafka.common
  * limitations under the License.
  */
 
+package kafka.common
+
 /**
  * Convenience case class since (clientId, topic) pairs are used in the creation
  * of many Stats objects.
  */
-case class ClientIdAndTopic(clientId: String, topic: String) {
+trait ClientIdTopic {
+}
+
+case class ClientIdAndTopic(clientId: String, topic: String) extends ClientIdTopic {
   override def toString = "%s-%s".format(clientId, topic)
 }
 
+case class ClientIdAllTopics(clientId: String) extends ClientIdTopic {
+  override def toString = "%s-%s".format(clientId, "AllTopics")
+}
+
+
diff --git a/core/src/main/scala/kafka/common/ConsumerCoordinatorNotAvailableException.scala b/core/src/main/scala/kafka/common/ConsumerCoordinatorNotAvailableException.scala
new file mode 100644
index 0000000000000..8e02d264e9447
--- /dev/null
+++ b/core/src/main/scala/kafka/common/ConsumerCoordinatorNotAvailableException.scala
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.common
+
+class ConsumerCoordinatorNotAvailableException(message: String) extends RuntimeException(message) {
+  def this() = this(null)
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/common/ErrorMapping.scala b/core/src/main/scala/kafka/common/ErrorMapping.scala
index b0b5dcedc4dad..eedc2f5f21dd8 100644
--- a/core/src/main/scala/kafka/common/ErrorMapping.scala
+++ b/core/src/main/scala/kafka/common/ErrorMapping.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -19,11 +19,10 @@ package kafka.common
 
 import kafka.message.InvalidMessageException
 import java.nio.ByteBuffer
-import java.lang.Throwable
 import scala.Predef._
 
 /**
- * A bi-directional mapping between error codes and exceptions x  
+ * A bi-directional mapping between error codes and exceptions
  */
 object ErrorMapping {
   val EmptyByteBuffer = ByteBuffer.allocate(0)
@@ -43,8 +42,15 @@ object ErrorMapping {
   val StaleControllerEpochCode: Short = 11
   val OffsetMetadataTooLargeCode: Short = 12
   val StaleLeaderEpochCode: Short = 13
+  val OffsetsLoadInProgressCode: Short = 14
+  val ConsumerCoordinatorNotAvailableCode: Short = 15
+  val NotCoordinatorForConsumerCode: Short = 16
+  val InvalidTopicCode : Short = 17
+  val MessageSetSizeTooLargeCode: Short = 18
+  val NotEnoughReplicasCode : Short = 19
+  val NotEnoughReplicasAfterAppendCode: Short = 20
 
-  private val exceptionToCode = 
+  private val exceptionToCode =
     Map[Class[Throwable], Short](
       classOf[OffsetOutOfRangeException].asInstanceOf[Class[Throwable]] -> OffsetOutOfRangeCode,
       classOf[InvalidMessageException].asInstanceOf[Class[Throwable]] -> InvalidMessageCode,
@@ -57,18 +63,27 @@ object ErrorMapping {
       classOf[ReplicaNotAvailableException].asInstanceOf[Class[Throwable]] -> ReplicaNotAvailableCode,
       classOf[MessageSizeTooLargeException].asInstanceOf[Class[Throwable]] -> MessageSizeTooLargeCode,
       classOf[ControllerMovedException].asInstanceOf[Class[Throwable]] -> StaleControllerEpochCode,
-      classOf[OffsetMetadataTooLargeException].asInstanceOf[Class[Throwable]] -> OffsetMetadataTooLargeCode
+      classOf[OffsetMetadataTooLargeException].asInstanceOf[Class[Throwable]] -> OffsetMetadataTooLargeCode,
+      classOf[OffsetsLoadInProgressException].asInstanceOf[Class[Throwable]] -> OffsetsLoadInProgressCode,
+      classOf[ConsumerCoordinatorNotAvailableException].asInstanceOf[Class[Throwable]] -> ConsumerCoordinatorNotAvailableCode,
+      classOf[NotCoordinatorForConsumerException].asInstanceOf[Class[Throwable]] -> NotCoordinatorForConsumerCode,
+      classOf[InvalidTopicException].asInstanceOf[Class[Throwable]] -> InvalidTopicCode,
+      classOf[MessageSetSizeTooLargeException].asInstanceOf[Class[Throwable]] -> MessageSetSizeTooLargeCode,
+      classOf[NotEnoughReplicasException].asInstanceOf[Class[Throwable]] -> NotEnoughReplicasCode,
+      classOf[NotEnoughReplicasAfterAppendException].asInstanceOf[Class[Throwable]] -> NotEnoughReplicasAfterAppendCode
     ).withDefaultValue(UnknownCode)
-  
+
   /* invert the mapping */
-  private val codeToException = 
+  private val codeToException =
     (Map[Short, Class[Throwable]]() ++ exceptionToCode.iterator.map(p => (p._2, p._1))).withDefaultValue(classOf[UnknownException])
-  
+
   def codeFor(exception: Class[Throwable]): Short = exceptionToCode(exception)
-  
+
   def maybeThrowException(code: Short) =
     if(code != 0)
       throw codeToException(code).newInstance()
 
   def exceptionFor(code: Short) : Throwable = codeToException(code).newInstance()
+
+  def exceptionNameFor(code: Short) : String = codeToException(code).getName()
 }
diff --git a/core/src/main/scala/kafka/common/MessageSetSizeTooLargeException.scala b/core/src/main/scala/kafka/common/MessageSetSizeTooLargeException.scala
new file mode 100644
index 0000000000000..94a616ed3972a
--- /dev/null
+++ b/core/src/main/scala/kafka/common/MessageSetSizeTooLargeException.scala
@@ -0,0 +1,22 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  *    http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+
+package kafka.common
+
+class MessageSetSizeTooLargeException(message: String) extends RuntimeException(message) {
+  def this() = this(null)
+}
diff --git a/core/src/main/scala/kafka/common/MessageStreamsExistException.scala b/core/src/main/scala/kafka/common/MessageStreamsExistException.scala
new file mode 100644
index 0000000000000..68a2e079ea5b8
--- /dev/null
+++ b/core/src/main/scala/kafka/common/MessageStreamsExistException.scala
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package kafka.common
+
+/**
+ * Indicates a createMessageStreams can't be called more thane once
+*/
+class MessageStreamsExistException(message: String, t: Throwable) extends RuntimeException(message, t) {
+}
diff --git a/core/src/main/scala/kafka/common/NoReplicaOnlineException.scala b/core/src/main/scala/kafka/common/NoReplicaOnlineException.scala
index a1e12794978ad..b66c8fc82c15b 100644
--- a/core/src/main/scala/kafka/common/NoReplicaOnlineException.scala
+++ b/core/src/main/scala/kafka/common/NoReplicaOnlineException.scala
@@ -20,7 +20,8 @@ package kafka.common
 
 /**
  * This exception is thrown by the leader elector in the controller when leader election fails for a partition since
- * all the replicas for a partition are offline
+ * all the leader candidate replicas for a partition are offline; the set of candidates may or may not be limited
+ * to just the in sync replicas depending upon whether unclean leader election is allowed to occur.
  */
 class NoReplicaOnlineException(message: String, cause: Throwable) extends RuntimeException(message, cause) {
   def this(message: String) = this(message, null)
diff --git a/core/src/main/scala/kafka/common/NotCoordinatorForConsumerException.scala b/core/src/main/scala/kafka/common/NotCoordinatorForConsumerException.scala
new file mode 100644
index 0000000000000..1eb74be038eaa
--- /dev/null
+++ b/core/src/main/scala/kafka/common/NotCoordinatorForConsumerException.scala
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.common
+
+class NotCoordinatorForConsumerException(message: String) extends RuntimeException(message) {
+  def this() = this(null)
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/common/NotEnoughReplicasAfterAppendException.scala b/core/src/main/scala/kafka/common/NotEnoughReplicasAfterAppendException.scala
new file mode 100644
index 0000000000000..c4f9def6162e9
--- /dev/null
+++ b/core/src/main/scala/kafka/common/NotEnoughReplicasAfterAppendException.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.common
+
+/**
+ * Number of insync replicas for the partition is lower than min.insync.replicas
+ * This exception is raised when the low ISR size is discovered *after* the message
+ * was already appended to the log. Producer retries will cause duplicates.
+ */
+class NotEnoughReplicasAfterAppendException(message: String) extends RuntimeException(message) {
+  def this() = this(null)
+}
diff --git a/core/src/main/scala/kafka/common/NotEnoughReplicasException.scala b/core/src/main/scala/kafka/common/NotEnoughReplicasException.scala
new file mode 100644
index 0000000000000..bfbe0ee4a5a15
--- /dev/null
+++ b/core/src/main/scala/kafka/common/NotEnoughReplicasException.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.common
+
+/**
+ * Message was rejected because number of insync replicas for the partition is lower than min.insync.replicas
+ */
+class NotEnoughReplicasException(message: String) extends RuntimeException(message) {
+    def this() = this(null)
+}
diff --git a/core/src/main/scala/kafka/common/OffsetMetadataAndError.scala b/core/src/main/scala/kafka/common/OffsetMetadataAndError.scala
index 59608a34202b4..4cabffeacea09 100644
--- a/core/src/main/scala/kafka/common/OffsetMetadataAndError.scala
+++ b/core/src/main/scala/kafka/common/OffsetMetadataAndError.scala
@@ -1,5 +1,3 @@
-package kafka.common
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -17,20 +15,42 @@ package kafka.common
  * limitations under the License.
  */
 
-/**
- * Convenience case class since (topic, partition) pairs are ubiquitous.
- */
-case class OffsetMetadataAndError(offset: Long, metadata: String = OffsetMetadataAndError.NoMetadata, error: Short = ErrorMapping.NoError) {
+package kafka.common
 
-  def this(tuple: (Long, String, Short)) = this(tuple._1, tuple._2, tuple._3)
+case class OffsetAndMetadata(offset: Long,
+                             metadata: String = OffsetAndMetadata.NoMetadata,
+                             timestamp: Long = -1L) {
+  override def toString = "OffsetAndMetadata[%d,%s%s]"
+                          .format(offset,
+                                  if (metadata != null && metadata.length > 0) metadata else "NO_METADATA",
+                                  if (timestamp == -1) "" else "," + timestamp.toString)
+}
 
-  def asTuple = (offset, metadata, error)
+object OffsetAndMetadata {
+  val InvalidOffset: Long = -1L
+  val NoMetadata: String = ""
+  val InvalidTime: Long = -1L
+}
+
+case class OffsetMetadataAndError(offset: Long,
+                                  metadata: String = OffsetAndMetadata.NoMetadata,
+                                  error: Short = ErrorMapping.NoError) {
+
+  def this(offsetMetadata: OffsetAndMetadata, error: Short) =
+    this(offsetMetadata.offset, offsetMetadata.metadata, error)
 
-  override def toString = "OffsetAndMetadata[%d,%s,%d]".format(offset, metadata, error)
+  def this(error: Short) =
+    this(OffsetAndMetadata.InvalidOffset, OffsetAndMetadata.NoMetadata, error)
 
+  def asTuple = (offset, metadata, error)
+
+  override def toString = "OffsetMetadataAndError[%d,%s,%d]".format(offset, metadata, error)
 }
 
 object OffsetMetadataAndError {
-  val InvalidOffset: Long = -1L;
-  val NoMetadata: String = "";
+  val NoOffset = OffsetMetadataAndError(OffsetAndMetadata.InvalidOffset, OffsetAndMetadata.NoMetadata, ErrorMapping.NoError)
+  val OffsetsLoading = OffsetMetadataAndError(OffsetAndMetadata.InvalidOffset, OffsetAndMetadata.NoMetadata, ErrorMapping.OffsetsLoadInProgressCode)
+  val NotOffsetManagerForGroup = OffsetMetadataAndError(OffsetAndMetadata.InvalidOffset, OffsetAndMetadata.NoMetadata, ErrorMapping.NotCoordinatorForConsumerCode)
+  val UnknownTopicOrPartition = OffsetMetadataAndError(OffsetAndMetadata.InvalidOffset, OffsetAndMetadata.NoMetadata, ErrorMapping.UnknownTopicOrPartitionCode)
 }
+
diff --git a/core/src/main/scala/kafka/utils/Annotations_2.8.scala b/core/src/main/scala/kafka/common/OffsetsLoadInProgressException.scala
similarity index 63%
rename from core/src/main/scala/kafka/utils/Annotations_2.8.scala
rename to core/src/main/scala/kafka/common/OffsetsLoadInProgressException.scala
index 28269eb037109..1c8e96eefc7f0 100644
--- a/core/src/main/scala/kafka/utils/Annotations_2.8.scala
+++ b/core/src/main/scala/kafka/common/OffsetsLoadInProgressException.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -15,22 +15,12 @@
  * limitations under the License.
  */
 
-package kafka.utils
-
-/* Some helpful annotations */
+package kafka.common
 
 /**
- * Indicates that the annotated class is meant to be threadsafe. For an abstract class it is an part of the interface that an implementation 
- * must respect
+ * Indicates that offsets are currently being loaded from disk into the cache so offset fetch requests cannot be satisfied.
  */
-class threadsafe extends StaticAnnotation
+class OffsetsLoadInProgressException(message: String) extends RuntimeException(message) {
+  def this() = this(null)
+}
 
-/**
- * Indicates that the annotated class is not threadsafe
- */
-class nonthreadsafe extends StaticAnnotation
-
-/**
- * Indicates that the annotated class is immutable
- */
-class immutable extends StaticAnnotation
diff --git a/core/src/main/scala/kafka/common/Topic.scala b/core/src/main/scala/kafka/common/Topic.scala
index c1b9f65114c77..ad759786d1c22 100644
--- a/core/src/main/scala/kafka/common/Topic.scala
+++ b/core/src/main/scala/kafka/common/Topic.scala
@@ -18,12 +18,16 @@
 package kafka.common
 
 import util.matching.Regex
+import kafka.server.OffsetManager
+
 
 object Topic {
   val legalChars = "[a-zA-Z0-9\\._\\-]"
   private val maxNameLength = 255
   private val rgx = new Regex(legalChars + "+")
 
+  val InternalTopics = Set(OffsetManager.OffsetsTopicName)
+
   def validate(topic: String) {
     if (topic.length <= 0)
       throw new InvalidTopicException("topic name is illegal, can't be empty")
diff --git a/core/src/main/scala/kafka/common/UnknownTopicOrPartitionException.scala b/core/src/main/scala/kafka/common/UnknownTopicOrPartitionException.scala
index 781e551e5b78b..f382d16de9c39 100644
--- a/core/src/main/scala/kafka/common/UnknownTopicOrPartitionException.scala
+++ b/core/src/main/scala/kafka/common/UnknownTopicOrPartitionException.scala
@@ -17,7 +17,9 @@
 package kafka.common
 
 /**
- * Indicates an unknown topic or a partition id not between 0 and numPartitions-1
+ * Indicates one of the following situation:
+ * 1. Producer does not have the partition metadata for this id upon sending messages
+ * 2. Broker does not have the specified partition by id upon receiving messages
  */
 class UnknownTopicOrPartitionException(message: String) extends RuntimeException(message) {
   def this() = this(null)
diff --git a/core/src/main/scala/kafka/consumer/ConsumerConfig.scala b/core/src/main/scala/kafka/consumer/ConsumerConfig.scala
index e6875d6aa830e..9ebbee6c16dc8 100644
--- a/core/src/main/scala/kafka/consumer/ConsumerConfig.scala
+++ b/core/src/main/scala/kafka/consumer/ConsumerConfig.scala
@@ -41,9 +41,15 @@ object ConsumerConfig extends Config {
   val MirrorTopicsWhitelist = ""
   val MirrorTopicsBlacklist = ""
   val MirrorConsumerNumThreads = 1
+  val OffsetsChannelBackoffMs = 1000
+  val OffsetsChannelSocketTimeoutMs = 10000
+  val OffsetsCommitMaxRetries = 5
+  val OffsetsStorage = "zookeeper"
 
   val MirrorTopicsWhitelistProp = "mirror.topics.whitelist"
   val MirrorTopicsBlacklistProp = "mirror.topics.blacklist"
+  val ExcludeInternalTopics = true
+  val DefaultPartitionAssignmentStrategy = "range" /* select between "range", and "roundrobin" */
   val MirrorConsumerNumThreadsProp = "mirror.consumer.numthreads"
   val DefaultClientId = ""
 
@@ -51,6 +57,7 @@ object ConsumerConfig extends Config {
     validateClientId(config.clientId)
     validateGroupId(config.groupId)
     validateAutoOffsetReset(config.autoOffsetReset)
+    validateOffsetsStorage(config.offsetsStorage)
   }
 
   def validateClientId(clientId: String) {
@@ -69,6 +76,15 @@ object ConsumerConfig extends Config {
                                                  "Valid values are " + OffsetRequest.SmallestTimeString + " and " + OffsetRequest.LargestTimeString)
     }
   }
+
+  def validateOffsetsStorage(storage: String) {
+    storage match {
+      case "zookeeper" =>
+      case "kafka" =>
+      case _ => throw new InvalidConfigException("Wrong value " + storage + " of offsets.storage in consumer config; " +
+                                                 "Valid values are 'zookeeper' and 'kafka'")
+    }
+  }
 }
 
 class ConsumerConfig private (val props: VerifiableProperties) extends ZKConfig(props) {
@@ -86,8 +102,10 @@ class ConsumerConfig private (val props: VerifiableProperties) extends ZKConfig(
    *  Set this explicitly for only testing purpose. */
   val consumerId: Option[String] = Option(props.getString("consumer.id", null))
 
-  /** the socket timeout for network requests. The actual timeout set will be max.fetch.wait + socket.timeout.ms. */
+  /** the socket timeout for network requests. Its value should be at least fetch.wait.max.ms. */
   val socketTimeoutMs = props.getInt("socket.timeout.ms", SocketTimeout)
+  require(fetchWaitMaxMs <= socketTimeoutMs, "socket.timeout.ms should always be at least fetch.wait.max.ms" +
+    " to prevent unnecessary socket timeouts")
   
   /** the socket receive buffer for network requests */
   val socketReceiveBufferBytes = props.getInt("socket.receive.buffer.bytes", SocketBufferSize)
@@ -122,6 +140,27 @@ class ConsumerConfig private (val props: VerifiableProperties) extends ZKConfig(
   /** backoff time to refresh the leader of a partition after it loses the current leader */
   val refreshLeaderBackoffMs = props.getInt("refresh.leader.backoff.ms", RefreshMetadataBackoffMs)
 
+  /** backoff time to reconnect the offsets channel or to retry offset fetches/commits */
+  val offsetsChannelBackoffMs = props.getInt("offsets.channel.backoff.ms", OffsetsChannelBackoffMs)
+  /** socket timeout to use when reading responses for Offset Fetch/Commit requests. This timeout will also be used for
+   *  the ConsumerMetdata requests that are used to query for the offset coordinator. */
+  val offsetsChannelSocketTimeoutMs = props.getInt("offsets.channel.socket.timeout.ms", OffsetsChannelSocketTimeoutMs)
+
+  /** Retry the offset commit up to this many times on failure. This retry count only applies to offset commits during
+    * shut-down. It does not apply to commits from the auto-commit thread. It also does not apply to attempts to query
+    * for the offset coordinator before committing offsets. i.e., if a consumer metadata request fails for any reason,
+    * it is retried and that retry does not count toward this limit. */
+  val offsetsCommitMaxRetries = props.getInt("offsets.commit.max.retries", OffsetsCommitMaxRetries)
+
+  /** Specify whether offsets should be committed to "zookeeper" (default) or "kafka" */
+  val offsetsStorage = props.getString("offsets.storage", OffsetsStorage).toLowerCase
+
+  /** If you are using "kafka" as offsets.storage, you can dual commit offsets to ZooKeeper (in addition to Kafka). This
+    * is required during migration from zookeeper-based offset storage to kafka-based offset storage. With respect to any
+    * given consumer group, it is safe to turn this off after all instances within that group have been migrated to
+    * the new jar that commits offsets to the broker (instead of directly to ZooKeeper). */
+  val dualCommitEnabled = props.getBoolean("dual.commit.enabled", if (offsetsStorage == "kafka") true else false)
+
   /* what to do if an offset is out of range.
      smallest : automatically reset the offset to the smallest offset
      largest : automatically reset the offset to the largest offset
@@ -136,6 +175,12 @@ class ConsumerConfig private (val props: VerifiableProperties) extends ZKConfig(
    */
   val clientId = props.getString("client.id", groupId)
 
+  /** Whether messages from internal topics (such as offsets) should be exposed to the consumer. */
+  val excludeInternalTopics = props.getBoolean("exclude.internal.topics", ExcludeInternalTopics)
+
+  /** Select a strategy for assigning partitions to consumer streams. Possible values: range, roundrobin */
+  val partitionAssignmentStrategy = props.getString("partition.assignment.strategy", DefaultPartitionAssignmentStrategy)
+  
   validate(this)
 }
 
diff --git a/core/src/main/scala/kafka/consumer/ConsumerConnector.scala b/core/src/main/scala/kafka/consumer/ConsumerConnector.scala
index 13c3f771e9a96..62c0686e816d2 100644
--- a/core/src/main/scala/kafka/consumer/ConsumerConnector.scala
+++ b/core/src/main/scala/kafka/consumer/ConsumerConnector.scala
@@ -70,6 +70,11 @@ trait ConsumerConnector {
   /**
    *  Commit the offsets of all broker partitions connected by this connector.
    */
+  def commitOffsets(retryOnFailure: Boolean)
+  
+  /**
+   * KAFKA-1743: This method added for backward compatibility.
+   */
   def commitOffsets
   
   /**
diff --git a/core/src/main/scala/kafka/consumer/ConsumerFetcherThread.scala b/core/src/main/scala/kafka/consumer/ConsumerFetcherThread.scala
index f8c1b4e674f75..ee6139c901082 100644
--- a/core/src/main/scala/kafka/consumer/ConsumerFetcherThread.scala
+++ b/core/src/main/scala/kafka/consumer/ConsumerFetcherThread.scala
@@ -30,7 +30,7 @@ class ConsumerFetcherThread(name: String,
                             partitionMap: Map[TopicAndPartition, PartitionTopicInfo],
                             val consumerFetcherManager: ConsumerFetcherManager)
         extends AbstractFetcherThread(name = name, 
-                                      clientId = config.clientId + "-" + name,
+                                      clientId = config.clientId,
                                       sourceBroker = sourceBroker,
                                       socketTimeout = config.socketTimeoutMs,
                                       socketBufferSize = config.socketReceiveBufferBytes,
diff --git a/core/src/main/scala/kafka/consumer/ConsumerIterator.scala b/core/src/main/scala/kafka/consumer/ConsumerIterator.scala
index ac491b4da2583..78fbf75651583 100644
--- a/core/src/main/scala/kafka/consumer/ConsumerIterator.scala
+++ b/core/src/main/scala/kafka/consumer/ConsumerIterator.scala
@@ -71,7 +71,6 @@ class ConsumerIterator[K, V](private val channel: BlockingQueue[FetchedDataChunk
       }
       if(currentDataChunk eq ZookeeperConsumerConnector.shutdownCommand) {
         debug("Received the shutdown command")
-        channel.offer(currentDataChunk)
         return allDone
       } else {
         currentTopicInfo = currentDataChunk.topicInfo
diff --git a/core/src/main/scala/kafka/consumer/ConsumerTopicStats.scala b/core/src/main/scala/kafka/consumer/ConsumerTopicStats.scala
index ff5f470f7aa30..01797ff766a7f 100644
--- a/core/src/main/scala/kafka/consumer/ConsumerTopicStats.scala
+++ b/core/src/main/scala/kafka/consumer/ConsumerTopicStats.scala
@@ -20,12 +20,17 @@ package kafka.consumer
 import kafka.utils.{Pool, threadsafe, Logging}
 import java.util.concurrent.TimeUnit
 import kafka.metrics.KafkaMetricsGroup
-import kafka.common.ClientIdAndTopic
+import kafka.common.{ClientIdTopic, ClientIdAllTopics, ClientIdAndTopic}
 
 @threadsafe
-class ConsumerTopicMetrics(metricId: ClientIdAndTopic) extends KafkaMetricsGroup {
-  val messageRate = newMeter(metricId + "MessagesPerSec",  "messages", TimeUnit.SECONDS)
-  val byteRate = newMeter(metricId + "BytesPerSec",  "bytes", TimeUnit.SECONDS)
+class ConsumerTopicMetrics(metricId: ClientIdTopic) extends KafkaMetricsGroup {
+  val tags = metricId match {
+    case ClientIdAndTopic(clientId, topic) => Map("clientId" -> clientId, "topic" -> topic)
+    case ClientIdAllTopics(clientId) => Map("clientId" -> clientId)
+  }
+
+  val messageRate = newMeter("MessagesPerSec", "messages", TimeUnit.SECONDS, tags)
+  val byteRate = newMeter("BytesPerSec", "bytes", TimeUnit.SECONDS, tags)
 }
 
 /**
@@ -35,12 +40,12 @@ class ConsumerTopicMetrics(metricId: ClientIdAndTopic) extends KafkaMetricsGroup
 class ConsumerTopicStats(clientId: String) extends Logging {
   private val valueFactory = (k: ClientIdAndTopic) => new ConsumerTopicMetrics(k)
   private val stats = new Pool[ClientIdAndTopic, ConsumerTopicMetrics](Some(valueFactory))
-  private val allTopicStats = new ConsumerTopicMetrics(new ClientIdAndTopic(clientId, "AllTopics")) // to differentiate from a topic named AllTopics
+  private val allTopicStats = new ConsumerTopicMetrics(new ClientIdAllTopics(clientId)) // to differentiate from a topic named AllTopics
 
   def getConsumerAllTopicStats(): ConsumerTopicMetrics = allTopicStats
 
   def getConsumerTopicStats(topic: String): ConsumerTopicMetrics = {
-    stats.getAndMaybePut(new ClientIdAndTopic(clientId, topic + "-"))
+    stats.getAndMaybePut(new ClientIdAndTopic(clientId, topic))
   }
 }
 
@@ -54,4 +59,8 @@ object ConsumerTopicStatsRegistry {
   def getConsumerTopicStat(clientId: String) = {
     globalStats.getAndMaybePut(clientId)
   }
+
+  def removeConsumerTopicStat(clientId: String) {
+    globalStats.remove(clientId)
+  }
 }
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/consumer/FetchRequestAndResponseStats.scala b/core/src/main/scala/kafka/consumer/FetchRequestAndResponseStats.scala
index 875eeeb73cba5..3df55e13001ce 100644
--- a/core/src/main/scala/kafka/consumer/FetchRequestAndResponseStats.scala
+++ b/core/src/main/scala/kafka/consumer/FetchRequestAndResponseStats.scala
@@ -17,14 +17,23 @@
 
 package kafka.consumer
 
-import kafka.metrics.{KafkaTimer, KafkaMetricsGroup}
-import kafka.utils.Pool
 import java.util.concurrent.TimeUnit
-import kafka.common.ClientIdAndBroker
 
-class FetchRequestAndResponseMetrics(metricId: ClientIdAndBroker) extends KafkaMetricsGroup {
-  val requestTimer = new KafkaTimer(newTimer(metricId + "FetchRequestRateAndTimeMs", TimeUnit.MILLISECONDS, TimeUnit.SECONDS))
-  val requestSizeHist = newHistogram(metricId + "FetchResponseSize")
+import kafka.common.{ClientIdAllBrokers, ClientIdBroker, ClientIdAndBroker}
+import kafka.metrics.{KafkaMetricsGroup, KafkaTimer}
+import kafka.utils.Pool
+
+class FetchRequestAndResponseMetrics(metricId: ClientIdBroker) extends KafkaMetricsGroup {
+  val tags = metricId match {
+    case ClientIdAndBroker(clientId, brokerHost, brokerPort) =>
+      Map("clientId" -> clientId, "brokerHost" -> brokerHost,
+      "brokerPort" -> brokerPort.toString)
+    case ClientIdAllBrokers(clientId) =>
+      Map("clientId" -> clientId)
+  }
+
+  val requestTimer = new KafkaTimer(newTimer("FetchRequestRateAndTimeMs", TimeUnit.MILLISECONDS, TimeUnit.SECONDS, tags))
+  val requestSizeHist = newHistogram("FetchResponseSize", biased = true, tags)
 }
 
 /**
@@ -32,14 +41,14 @@ class FetchRequestAndResponseMetrics(metricId: ClientIdAndBroker) extends KafkaM
  * @param clientId ClientId of the given consumer
  */
 class FetchRequestAndResponseStats(clientId: String) {
-  private val valueFactory = (k: ClientIdAndBroker) => new FetchRequestAndResponseMetrics(k)
-  private val stats = new Pool[ClientIdAndBroker, FetchRequestAndResponseMetrics](Some(valueFactory))
-  private val allBrokersStats = new FetchRequestAndResponseMetrics(new ClientIdAndBroker(clientId, "AllBrokers"))
+  private val valueFactory = (k: ClientIdBroker) => new FetchRequestAndResponseMetrics(k)
+  private val stats = new Pool[ClientIdBroker, FetchRequestAndResponseMetrics](Some(valueFactory))
+  private val allBrokersStats = new FetchRequestAndResponseMetrics(new ClientIdAllBrokers(clientId))
 
   def getFetchRequestAndResponseAllBrokersStats(): FetchRequestAndResponseMetrics = allBrokersStats
 
-  def getFetchRequestAndResponseStats(brokerInfo: String): FetchRequestAndResponseMetrics = {
-    stats.getAndMaybePut(new ClientIdAndBroker(clientId, brokerInfo + "-"))
+  def getFetchRequestAndResponseStats(brokerHost: String, brokerPort: Int): FetchRequestAndResponseMetrics = {
+    stats.getAndMaybePut(new ClientIdAndBroker(clientId, brokerHost, brokerPort))
   }
 }
 
@@ -53,6 +62,17 @@ object FetchRequestAndResponseStatsRegistry {
   def getFetchRequestAndResponseStats(clientId: String) = {
     globalStats.getAndMaybePut(clientId)
   }
+
+  def removeConsumerFetchRequestAndResponseStats(clientId: String) {
+    val pattern = (".*" + clientId + ".*").r
+    val keys = globalStats.keys
+    for (key <- keys) {
+      pattern.findFirstIn(key) match {
+        case Some(_) => globalStats.remove(key)
+        case _ =>
+      }
+    }
+  }
 }
 
 
diff --git a/core/src/main/scala/kafka/consumer/KafkaStream.scala b/core/src/main/scala/kafka/consumer/KafkaStream.scala
index 31eaf866e3c84..805e91677034e 100644
--- a/core/src/main/scala/kafka/consumer/KafkaStream.scala
+++ b/core/src/main/scala/kafka/consumer/KafkaStream.scala
@@ -45,4 +45,7 @@ class KafkaStream[K,V](private val queue: BlockingQueue[FetchedDataChunk],
     iter.clearCurrentChunk()
   }
 
+  override def toString(): String = {
+     "%s kafka stream".format(clientId)
+  }
 }
diff --git a/core/src/main/scala/kafka/consumer/PartitionAssignor.scala b/core/src/main/scala/kafka/consumer/PartitionAssignor.scala
new file mode 100644
index 0000000000000..e6ff7683a0df4
--- /dev/null
+++ b/core/src/main/scala/kafka/consumer/PartitionAssignor.scala
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.consumer
+
+import org.I0Itec.zkclient.ZkClient
+import kafka.common.TopicAndPartition
+import kafka.utils.{Utils, ZkUtils, Logging}
+
+trait PartitionAssignor {
+
+  /**
+   * Assigns partitions to consumer instances in a group.
+   * @return An assignment map of partition to consumer thread. This only includes assignments for threads that belong
+   *         to the given assignment-context's consumer.
+   */
+  def assign(ctx: AssignmentContext): scala.collection.Map[TopicAndPartition, ConsumerThreadId]
+
+}
+
+object PartitionAssignor {
+  def createInstance(assignmentStrategy: String) = assignmentStrategy match {
+    case "roundrobin" => new RoundRobinAssignor()
+    case _ => new RangeAssignor()
+  }
+}
+
+class AssignmentContext(group: String, val consumerId: String, excludeInternalTopics: Boolean, zkClient: ZkClient) {
+  val myTopicThreadIds: collection.Map[String, collection.Set[ConsumerThreadId]] = {
+    val myTopicCount = TopicCount.constructTopicCount(group, consumerId, zkClient, excludeInternalTopics)
+    myTopicCount.getConsumerThreadIdsPerTopic
+  }
+
+  val partitionsForTopic: collection.Map[String, Seq[Int]] =
+    ZkUtils.getPartitionsForTopics(zkClient, myTopicThreadIds.keySet.toSeq)
+
+  val consumersForTopic: collection.Map[String, List[ConsumerThreadId]] =
+    ZkUtils.getConsumersPerTopic(zkClient, group, excludeInternalTopics)
+
+  val consumers: Seq[String] = ZkUtils.getConsumersInGroup(zkClient, group).sorted
+}
+
+/**
+ * The round-robin partition assignor lays out all the available partitions and all the available consumer threads. It
+ * then proceeds to do a round-robin assignment from partition to consumer thread. If the subscriptions of all consumer
+ * instances are identical, then the partitions will be uniformly distributed. (i.e., the partition ownership counts
+ * will be within a delta of exactly one across all consumer threads.)
+ *
+ * (For simplicity of implementation) the assignor is allowed to assign a given topic-partition to any consumer instance
+ * and thread-id within that instance. Therefore, round-robin assignment is allowed only if:
+ * a) Every topic has the same number of streams within a consumer instance
+ * b) The set of subscribed topics is identical for every consumer instance within the group.
+ */
+
+class RoundRobinAssignor() extends PartitionAssignor with Logging {
+
+  def assign(ctx: AssignmentContext) = {
+    val partitionOwnershipDecision = collection.mutable.Map[TopicAndPartition, ConsumerThreadId]()
+
+    if (ctx.consumersForTopic.size > 0) {
+      // check conditions (a) and (b)
+      val (headTopic, headThreadIdSet) = (ctx.consumersForTopic.head._1, ctx.consumersForTopic.head._2.toSet)
+      ctx.consumersForTopic.foreach { case (topic, threadIds) =>
+        val threadIdSet = threadIds.toSet
+        require(threadIdSet == headThreadIdSet,
+          "Round-robin assignment is allowed only if all consumers in the group subscribe to the same topics, " +
+            "AND if the stream counts across topics are identical for a given consumer instance.\n" +
+            "Topic %s has the following available consumer streams: %s\n".format(topic, threadIdSet) +
+            "Topic %s has the following available consumer streams: %s\n".format(headTopic, headThreadIdSet))
+      }
+
+      val threadAssignor = Utils.circularIterator(headThreadIdSet.toSeq.sorted)
+
+      info("Starting round-robin assignment with consumers " + ctx.consumers)
+      val allTopicPartitions = ctx.partitionsForTopic.flatMap { case (topic, partitions) =>
+        info("Consumer %s rebalancing the following partitions for topic %s: %s"
+          .format(ctx.consumerId, topic, partitions))
+        partitions.map(partition => {
+          TopicAndPartition(topic, partition)
+        })
+      }.toSeq.sortWith((topicPartition1, topicPartition2) => {
+        /*
+         * Randomize the order by taking the hashcode to reduce the likelihood of all partitions of a given topic ending
+         * up on one consumer (if it has a high enough stream count).
+         */
+        topicPartition1.toString.hashCode < topicPartition2.toString.hashCode
+      })
+
+      allTopicPartitions.foreach(topicPartition => {
+        val threadId = threadAssignor.next()
+        if (threadId.consumer == ctx.consumerId)
+          partitionOwnershipDecision += (topicPartition -> threadId)
+      })
+    }
+
+    partitionOwnershipDecision
+  }
+}
+
+/**
+ * Range partitioning works on a per-topic basis. For each topic, we lay out the available partitions in numeric order
+ * and the consumer threads in lexicographic order. We then divide the number of partitions by the total number of
+ * consumer streams (threads) to determine the number of partitions to assign to each consumer. If it does not evenly
+ * divide, then the first few consumers will have one extra partition. For example, suppose there are two consumers C1
+ * and C2 with two streams each, and there are five available partitions (p0, p1, p2, p3, p4). So each consumer thread
+ * will get at least one partition and the first consumer thread will get one extra partition. So the assignment will be:
+ * p0 -> C1-0, p1 -> C1-0, p2 -> C1-1, p3 -> C2-0, p4 -> C2-1
+ */
+class RangeAssignor() extends PartitionAssignor with Logging {
+
+  def assign(ctx: AssignmentContext) = {
+    val partitionOwnershipDecision = collection.mutable.Map[TopicAndPartition, ConsumerThreadId]()
+
+    for ((topic, consumerThreadIdSet) <- ctx.myTopicThreadIds) {
+      val curConsumers = ctx.consumersForTopic(topic)
+      val curPartitions: Seq[Int] = ctx.partitionsForTopic(topic)
+
+      val nPartsPerConsumer = curPartitions.size / curConsumers.size
+      val nConsumersWithExtraPart = curPartitions.size % curConsumers.size
+
+      info("Consumer " + ctx.consumerId + " rebalancing the following partitions: " + curPartitions +
+        " for topic " + topic + " with consumers: " + curConsumers)
+
+      for (consumerThreadId <- consumerThreadIdSet) {
+        val myConsumerPosition = curConsumers.indexOf(consumerThreadId)
+        assert(myConsumerPosition >= 0)
+        val startPart = nPartsPerConsumer * myConsumerPosition + myConsumerPosition.min(nConsumersWithExtraPart)
+        val nParts = nPartsPerConsumer + (if (myConsumerPosition + 1 > nConsumersWithExtraPart) 0 else 1)
+
+        /**
+         *   Range-partition the sorted partitions to consumers for better locality.
+         *  The first few consumers pick up an extra partition, if any.
+         */
+        if (nParts <= 0)
+          warn("No broker partitions consumed by consumer thread " + consumerThreadId + " for topic " + topic)
+        else {
+          for (i <- startPart until startPart + nParts) {
+            val partition = curPartitions(i)
+            info(consumerThreadId + " attempting to claim partition " + partition)
+            // record the partition ownership decision
+            partitionOwnershipDecision += (TopicAndPartition(topic, partition) -> consumerThreadId)
+          }
+        }
+      }
+    }
+
+    partitionOwnershipDecision
+  }
+}
diff --git a/core/src/main/scala/kafka/consumer/SimpleConsumer.scala b/core/src/main/scala/kafka/consumer/SimpleConsumer.scala
index 6dae149adfd4b..e53ee51638b93 100644
--- a/core/src/main/scala/kafka/consumer/SimpleConsumer.scala
+++ b/core/src/main/scala/kafka/consumer/SimpleConsumer.scala
@@ -21,6 +21,7 @@ import kafka.api._
 import kafka.network._
 import kafka.utils._
 import kafka.common.{ErrorMapping, TopicAndPartition}
+import org.apache.kafka.common.utils.Utils._
 
 /**
  * A consumer of kafka messages
@@ -35,7 +36,6 @@ class SimpleConsumer(val host: String,
   ConsumerConfig.validateClientId(clientId)
   private val lock = new Object()
   private val blockingChannel = new BlockingChannel(host, port, bufferSize, BlockingChannel.UseDefaultBufferSize, soTimeout)
-  val brokerInfo = "host_%s-port_%s".format(host, port)
   private val fetchRequestAndResponseStats = FetchRequestAndResponseStatsRegistry.getFetchRequestAndResponseStats(clientId)
   private var isClosed = false
 
@@ -46,10 +46,8 @@ class SimpleConsumer(val host: String,
   }
 
   private def disconnect() = {
-    if(blockingChannel.isConnected) {
-      debug("Disconnecting from " + host + ":" + port)
-      blockingChannel.disconnect()
-    }
+    debug("Disconnecting from " + formatAddress(host, port))
+    blockingChannel.disconnect()
   }
 
   private def reconnect() {
@@ -66,14 +64,14 @@ class SimpleConsumer(val host: String,
   
   private def sendRequest(request: RequestOrResponse): Receive = {
     lock synchronized {
-      getOrMakeConnection()
       var response: Receive = null
       try {
+        getOrMakeConnection()
         blockingChannel.send(request)
         response = blockingChannel.receive()
       } catch {
         case e : Throwable =>
-          warn("Reconnect due to socket error: %s".format(e.getMessage))
+          info("Reconnect due to socket error: %s".format(e.toString))
           // retry once
           try {
             reconnect()
@@ -94,6 +92,11 @@ class SimpleConsumer(val host: String,
     TopicMetadataResponse.readFrom(response.buffer)
   }
 
+  def send(request: ConsumerMetadataRequest): ConsumerMetadataResponse = {
+    val response = sendRequest(request)
+    ConsumerMetadataResponse.readFrom(response.buffer)
+  }
+
   /**
    *  Fetch a set of messages from a topic.
    *
@@ -102,7 +105,7 @@ class SimpleConsumer(val host: String,
    */
   def fetch(request: FetchRequest): FetchResponse = {
     var response: Receive = null
-    val specificTimer = fetchRequestAndResponseStats.getFetchRequestAndResponseStats(brokerInfo).requestTimer
+    val specificTimer = fetchRequestAndResponseStats.getFetchRequestAndResponseStats(host, port).requestTimer
     val aggregateTimer = fetchRequestAndResponseStats.getFetchRequestAndResponseAllBrokersStats.requestTimer
     aggregateTimer.time {
       specificTimer.time {
@@ -111,7 +114,7 @@ class SimpleConsumer(val host: String,
     }
     val fetchResponse = FetchResponse.readFrom(response.buffer)
     val fetchedSize = fetchResponse.sizeInBytes
-    fetchRequestAndResponseStats.getFetchRequestAndResponseStats(brokerInfo).requestSizeHist.update(fetchedSize)
+    fetchRequestAndResponseStats.getFetchRequestAndResponseStats(host, port).requestSizeHist.update(fetchedSize)
     fetchRequestAndResponseStats.getFetchRequestAndResponseAllBrokersStats.requestSizeHist.update(fetchedSize)
     fetchResponse
   }
@@ -128,7 +131,11 @@ class SimpleConsumer(val host: String,
    * @param request a [[kafka.api.OffsetCommitRequest]] object.
    * @return a [[kafka.api.OffsetCommitResponse]] object.
    */
-  def commitOffsets(request: OffsetCommitRequest) = OffsetCommitResponse.readFrom(sendRequest(request).buffer)
+  def commitOffsets(request: OffsetCommitRequest) = {
+    // TODO: With KAFKA-1012, we have to first issue a ConsumerMetadataRequest and connect to the coordinator before
+    // we can commit offsets.
+    OffsetCommitResponse.readFrom(sendRequest(request).buffer)
+  }
 
   /**
    * Fetch offsets for a topic
diff --git a/core/src/main/scala/kafka/consumer/TopicCount.scala b/core/src/main/scala/kafka/consumer/TopicCount.scala
index e33263378489f..0954b3c3ff8b3 100644
--- a/core/src/main/scala/kafka/consumer/TopicCount.scala
+++ b/core/src/main/scala/kafka/consumer/TopicCount.scala
@@ -19,35 +19,44 @@ package kafka.consumer
 
 import scala.collection._
 import org.I0Itec.zkclient.ZkClient
-import kafka.utils.{Json, ZKGroupDirs, ZkUtils, Logging}
+import kafka.utils.{Json, ZKGroupDirs, ZkUtils, Logging, Utils}
 import kafka.common.KafkaException
 
 private[kafka] trait TopicCount {
 
-  def getConsumerThreadIdsPerTopic: Map[String, Set[String]]
+  def getConsumerThreadIdsPerTopic: Map[String, Set[ConsumerThreadId]]
   def getTopicCountMap: Map[String, Int]
   def pattern: String
-  
-  protected def makeConsumerThreadIdsPerTopic(consumerIdString: String,
-                                            topicCountMap: Map[String,  Int]) = {
-    val consumerThreadIdsPerTopicMap = new mutable.HashMap[String, Set[String]]()
+
+}
+
+case class ConsumerThreadId(consumer: String, threadId: Int) extends Ordered[ConsumerThreadId] {
+  override def toString = "%s-%d".format(consumer, threadId)
+
+  def compare(that: ConsumerThreadId) = toString.compare(that.toString)
+}
+
+private[kafka] object TopicCount extends Logging {
+  val whiteListPattern = "white_list"
+  val blackListPattern = "black_list"
+  val staticPattern = "static"
+
+  def makeThreadId(consumerIdString: String, threadId: Int) = consumerIdString + "-" + threadId
+
+  def makeConsumerThreadIdsPerTopic(consumerIdString: String,
+                                    topicCountMap: Map[String,  Int]) = {
+    val consumerThreadIdsPerTopicMap = new mutable.HashMap[String, Set[ConsumerThreadId]]()
     for ((topic, nConsumers) <- topicCountMap) {
-      val consumerSet = new mutable.HashSet[String]
+      val consumerSet = new mutable.HashSet[ConsumerThreadId]
       assert(nConsumers >= 1)
       for (i <- 0 until nConsumers)
-        consumerSet += consumerIdString + "-" + i
+        consumerSet += ConsumerThreadId(consumerIdString, i)
       consumerThreadIdsPerTopicMap.put(topic, consumerSet)
     }
     consumerThreadIdsPerTopicMap
   }
-}
-
-private[kafka] object TopicCount extends Logging {
-  val whiteListPattern = "white_list"
-  val blackListPattern = "black_list"
-  val staticPattern = "static"
 
-  def constructTopicCount(group: String, consumerId: String, zkClient: ZkClient) : TopicCount = {
+  def constructTopicCount(group: String, consumerId: String, zkClient: ZkClient, excludeInternalTopics: Boolean) : TopicCount = {
     val dirs = new ZKGroupDirs(group)
     val topicCountString = ZkUtils.readData(zkClient, dirs.consumerRegistryDir + "/" + consumerId)._1
     var subscriptionPattern: String = null
@@ -85,15 +94,15 @@ private[kafka] object TopicCount extends Logging {
           new Whitelist(regex)
         else
           new Blacklist(regex)
-      new WildcardTopicCount(zkClient, consumerId, filter, numStreams)
+      new WildcardTopicCount(zkClient, consumerId, filter, numStreams, excludeInternalTopics)
     }
   }
 
   def constructTopicCount(consumerIdString: String, topicCount: Map[String, Int]) =
     new StaticTopicCount(consumerIdString, topicCount)
 
-  def constructTopicCount(consumerIdString: String, filter: TopicFilter, numStreams: Int, zkClient: ZkClient) =
-    new WildcardTopicCount(zkClient, consumerIdString, filter, numStreams)
+  def constructTopicCount(consumerIdString: String, filter: TopicFilter, numStreams: Int, zkClient: ZkClient, excludeInternalTopics: Boolean) =
+    new WildcardTopicCount(zkClient, consumerIdString, filter, numStreams, excludeInternalTopics)
 
 }
 
@@ -101,7 +110,7 @@ private[kafka] class StaticTopicCount(val consumerIdString: String,
                                 val topicCountMap: Map[String, Int])
                                 extends TopicCount {
 
-  def getConsumerThreadIdsPerTopic = makeConsumerThreadIdsPerTopic(consumerIdString, topicCountMap)
+  def getConsumerThreadIdsPerTopic = TopicCount.makeConsumerThreadIdsPerTopic(consumerIdString, topicCountMap)
 
   override def equals(obj: Any): Boolean = {
     obj match {
@@ -119,13 +128,15 @@ private[kafka] class StaticTopicCount(val consumerIdString: String,
 private[kafka] class WildcardTopicCount(zkClient: ZkClient,
                                         consumerIdString: String,
                                         topicFilter: TopicFilter,
-                                        numStreams: Int) extends TopicCount {
+                                        numStreams: Int,
+                                        excludeInternalTopics: Boolean) extends TopicCount {
   def getConsumerThreadIdsPerTopic = {
-    val wildcardTopics = ZkUtils.getChildrenParentMayNotExist(zkClient, ZkUtils.BrokerTopicsPath).filter(topicFilter.isTopicAllowed(_))
-    makeConsumerThreadIdsPerTopic(consumerIdString, Map(wildcardTopics.map((_, numStreams)): _*))
+    val wildcardTopics = ZkUtils.getChildrenParentMayNotExist(zkClient, ZkUtils.BrokerTopicsPath)
+                         .filter(topic => topicFilter.isTopicAllowed(topic, excludeInternalTopics))
+    TopicCount.makeConsumerThreadIdsPerTopic(consumerIdString, Map(wildcardTopics.map((_, numStreams)): _*))
   }
 
-  def getTopicCountMap = Map(topicFilter.regex -> numStreams)
+  def getTopicCountMap = Map(Utils.JSONEscapeString(topicFilter.regex) -> numStreams)
 
   def pattern: String = {
     topicFilter match {
diff --git a/core/src/main/scala/kafka/consumer/TopicFilter.scala b/core/src/main/scala/kafka/consumer/TopicFilter.scala
index 4f2082360cfbb..5a13540699be4 100644
--- a/core/src/main/scala/kafka/consumer/TopicFilter.scala
+++ b/core/src/main/scala/kafka/consumer/TopicFilter.scala
@@ -20,6 +20,7 @@ package kafka.consumer
 
 import kafka.utils.Logging
 import java.util.regex.{PatternSyntaxException, Pattern}
+import kafka.common.Topic
 
 
 sealed abstract class TopicFilter(rawRegex: String) extends Logging {
@@ -41,12 +42,12 @@ sealed abstract class TopicFilter(rawRegex: String) extends Logging {
 
   override def toString = regex
 
-  def isTopicAllowed(topic: String): Boolean
+  def isTopicAllowed(topic: String, excludeInternalTopics: Boolean): Boolean
 }
 
 case class Whitelist(rawRegex: String) extends TopicFilter(rawRegex) {
-  override def isTopicAllowed(topic: String) = {
-    val allowed = topic.matches(regex)
+  override def isTopicAllowed(topic: String, excludeInternalTopics: Boolean) = {
+    val allowed = topic.matches(regex) && !(Topic.InternalTopics.contains(topic) && excludeInternalTopics)
 
     debug("%s %s".format(
       topic, if (allowed) "allowed" else "filtered"))
@@ -58,8 +59,8 @@ case class Whitelist(rawRegex: String) extends TopicFilter(rawRegex) {
 }
 
 case class Blacklist(rawRegex: String) extends TopicFilter(rawRegex) {
-  override def isTopicAllowed(topic: String) = {
-    val allowed = !topic.matches(regex)
+  override def isTopicAllowed(topic: String, excludeInternalTopics: Boolean) = {
+    val allowed = (!topic.matches(regex)) && !(Topic.InternalTopics.contains(topic) && excludeInternalTopics)
 
     debug("%s %s".format(
       topic, if (allowed) "allowed" else "filtered"))
diff --git a/core/src/main/scala/kafka/consumer/ZookeeperConsumerConnector.scala b/core/src/main/scala/kafka/consumer/ZookeeperConsumerConnector.scala
index 703b2e22605ca..e991d2187d032 100644
--- a/core/src/main/scala/kafka/consumer/ZookeeperConsumerConnector.scala
+++ b/core/src/main/scala/kafka/consumer/ZookeeperConsumerConnector.scala
@@ -17,24 +17,30 @@
 
 package kafka.consumer
 
+import java.net.InetAddress
+import java.util.UUID
 import java.util.concurrent._
 import java.util.concurrent.atomic._
-import locks.ReentrantLock
-import collection._
+import java.util.concurrent.locks.ReentrantLock
+
+import com.yammer.metrics.core.Gauge
+import kafka.api._
+import kafka.client.ClientUtils
 import kafka.cluster._
+import kafka.common._
+import kafka.javaapi.consumer.ConsumerRebalanceListener
+import kafka.metrics._
+import kafka.network.BlockingChannel
+import kafka.serializer._
+import kafka.utils.Utils.inLock
+import kafka.utils.ZkUtils._
 import kafka.utils._
 import org.I0Itec.zkclient.exception.ZkNodeExistsException
-import java.net.InetAddress
-import org.I0Itec.zkclient.{IZkDataListener, IZkStateListener, IZkChildListener, ZkClient}
+import org.I0Itec.zkclient.{IZkChildListener, IZkDataListener, IZkStateListener, ZkClient}
 import org.apache.zookeeper.Watcher.Event.KeeperState
-import java.util.UUID
-import kafka.serializer._
-import kafka.utils.ZkUtils._
-import kafka.utils.Utils.inLock
-import kafka.common._
-import com.yammer.metrics.core.Gauge
-import kafka.metrics._
-import scala.Some
+
+import scala.collection._
+import scala.collection.JavaConversions._
 
 
 /**
@@ -42,7 +48,7 @@ import scala.Some
  *
  * Directories:
  * 1. Consumer id registry:
- * /consumers/[group_id]/ids[consumer_id] -> topic1,...topicN
+ * /consumers/[group_id]/ids/[consumer_id] -> topic1,...topicN
  * A consumer has a unique consumer id within a consumer group. A consumer registers its id as an ephemeral znode
  * and puts all topics that it subscribes to as the value of the znode. The znode is deleted when the client is gone.
  * A consumer subscribes to event changes of the consumer id registry within its group.
@@ -85,8 +91,8 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
   private var fetcher: Option[ConsumerFetcherManager] = None
   private var zkClient: ZkClient = null
   private var topicRegistry = new Pool[String, Pool[Int, PartitionTopicInfo]]
-  private var checkpointedOffsets = new Pool[TopicAndPartition, Long]
-  private val topicThreadIdAndQueues = new Pool[(String,String), BlockingQueue[FetchedDataChunk]]
+  private val checkpointedZkOffsets = new Pool[TopicAndPartition, Long]
+  private val topicThreadIdAndQueues = new Pool[(String, ConsumerThreadId), BlockingQueue[FetchedDataChunk]]
   private val scheduler = new KafkaScheduler(threads = 1, threadNamePrefix = "kafka-consumer-scheduler-")
   private val messageStreamCreated = new AtomicBoolean(false)
 
@@ -94,7 +100,16 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
   private var topicPartitionChangeListener: ZKTopicPartitionChangeListener = null
   private var loadBalancerListener: ZKRebalancerListener = null
 
+  private var offsetsChannel: BlockingChannel = null
+  private val offsetsChannelLock = new Object
+
   private var wildcardTopicWatcher: ZookeeperTopicEventWatcher = null
+  private var consumerRebalanceListener: ConsumerRebalanceListener = null
+
+  // useful for tracking migration of consumers to store offsets in kafka
+  private val kafkaCommitMeter = newMeter("KafkaCommitsPerSec", "commits", TimeUnit.SECONDS, Map("clientId" -> config.clientId))
+  private val zkCommitMeter = newMeter("ZooKeeperCommitsPerSec", "commits", TimeUnit.SECONDS, Map("clientId" -> config.clientId))
+  private val rebalanceTimer = new KafkaTimer(newTimer("RebalanceRateAndTime", TimeUnit.MILLISECONDS, TimeUnit.SECONDS, Map("clientId" -> config.clientId)))
 
   val consumerIdString = {
     var consumerUuid : String = null
@@ -113,39 +128,49 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
 
   connectZk()
   createFetcher()
+  ensureOffsetManagerConnected()
+
   if (config.autoCommitEnable) {
     scheduler.startup
     info("starting auto committer every " + config.autoCommitIntervalMs + " ms")
-    scheduler.schedule("kafka-consumer-autocommit", 
-                       autoCommit, 
+    scheduler.schedule("kafka-consumer-autocommit",
+                       autoCommit,
                        delay = config.autoCommitIntervalMs,
-                       period = config.autoCommitIntervalMs, 
+                       period = config.autoCommitIntervalMs,
                        unit = TimeUnit.MILLISECONDS)
   }
 
   KafkaMetricsReporter.startReporters(config.props)
+  AppInfo.registerInfo()
 
   def this(config: ConsumerConfig) = this(config, true)
-  
-  def createMessageStreams(topicCountMap: Map[String,Int]): Map[String, List[KafkaStream[Array[Byte],Array[Byte]]]] = 
+
+  def createMessageStreams(topicCountMap: Map[String,Int]): Map[String, List[KafkaStream[Array[Byte],Array[Byte]]]] =
     createMessageStreams(topicCountMap, new DefaultDecoder(), new DefaultDecoder())
 
   def createMessageStreams[K,V](topicCountMap: Map[String,Int], keyDecoder: Decoder[K], valueDecoder: Decoder[V])
       : Map[String, List[KafkaStream[K,V]]] = {
     if (messageStreamCreated.getAndSet(true))
-      throw new RuntimeException(this.getClass.getSimpleName +
-                                   " can create message streams at most once")
+      throw new MessageStreamsExistException(this.getClass.getSimpleName +
+                                   " can create message streams at most once",null)
     consume(topicCountMap, keyDecoder, valueDecoder)
   }
 
-  def createMessageStreamsByFilter[K,V](topicFilter: TopicFilter, 
-                                        numStreams: Int, 
-                                        keyDecoder: Decoder[K] = new DefaultDecoder(), 
+  def createMessageStreamsByFilter[K,V](topicFilter: TopicFilter,
+                                        numStreams: Int,
+                                        keyDecoder: Decoder[K] = new DefaultDecoder(),
                                         valueDecoder: Decoder[V] = new DefaultDecoder()) = {
     val wildcardStreamsHandler = new WildcardStreamsHandler[K,V](topicFilter, numStreams, keyDecoder, valueDecoder)
     wildcardStreamsHandler.streams
   }
 
+  def setConsumerRebalanceListener(listener: ConsumerRebalanceListener) {
+    if (messageStreamCreated.get())
+      throw new MessageStreamsExistException(this.getClass.getSimpleName +
+        " can only set consumer rebalance listener before creating streams",null)
+    consumerRebalanceListener = listener
+  }
+
   private def createFetcher() {
     if (enableFetcher)
       fetcher = Some(new ConsumerFetcherManager(consumerIdString, config, zkClient))
@@ -156,12 +181,23 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     zkClient = new ZkClient(config.zkConnect, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, ZKStringSerializer)
   }
 
-  def shutdown() {
-    rebalanceLock synchronized {
-      val canShutdown = isShuttingDown.compareAndSet(false, true);
-      if (canShutdown) {
-        info("ZKConsumerConnector shutting down")
+  // Blocks until the offset manager is located and a channel is established to it.
+  private def ensureOffsetManagerConnected() {
+    if (config.offsetsStorage == "kafka") {
+      if (offsetsChannel == null || !offsetsChannel.isConnected)
+        offsetsChannel = ClientUtils.channelToOffsetManager(config.groupId, zkClient, config.offsetsChannelSocketTimeoutMs, config.offsetsChannelBackoffMs)
 
+      debug("Connected to offset manager %s:%d.".format(offsetsChannel.host, offsetsChannel.port))
+    }
+  }
+
+  def shutdown() {
+    val canShutdown = isShuttingDown.compareAndSet(false, true)
+    if (canShutdown) {
+      info("ZKConsumerConnector shutting down")
+      val startTime = System.nanoTime()
+      KafkaMetricsGroup.removeAllConsumerMetrics(config.clientId)
+      rebalanceLock synchronized {
         if (wildcardTopicWatcher != null)
           wildcardTopicWatcher.shutdown()
         try {
@@ -173,16 +209,18 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
           }
           sendShutdownToAllQueues()
           if (config.autoCommitEnable)
-            commitOffsets()
+            commitOffsets(true)
           if (zkClient != null) {
             zkClient.close()
             zkClient = null
           }
+
+          if (offsetsChannel != null) offsetsChannel.disconnect()
         } catch {
           case e: Throwable =>
             fatal("error during consumer connector shutdown", e)
         }
-        info("ZKConsumerConnector shut down completed")
+        info("ZKConsumerConnector shutdown completed in " + (System.nanoTime() - startTime) / 1000000 + " ms")
       }
     }
   }
@@ -229,7 +267,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
   }
 
   private def sendShutdownToAllQueues() = {
-    for (queue <- topicThreadIdAndQueues.values) {
+    for (queue <- topicThreadIdAndQueues.values.toSet[BlockingQueue[FetchedDataChunk]]) {
       debug("Clearing up queue")
       queue.clear()
       queue.put(ZookeeperConsumerConnector.shutdownCommand)
@@ -240,7 +278,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
   def autoCommit() {
     trace("auto committing")
     try {
-      commitOffsets()
+      commitOffsets(isAutoCommit = false)
     }
     catch {
       case t: Throwable =>
@@ -249,30 +287,190 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     }
   }
 
-  def commitOffsets() {
-    if (zkClient == null) {
-      error("zk client is null. Cannot commit offsets")
-      return
+  def commitOffsetToZooKeeper(topicPartition: TopicAndPartition, offset: Long) {
+    if (checkpointedZkOffsets.get(topicPartition) != offset) {
+      val topicDirs = new ZKGroupTopicDirs(config.groupId, topicPartition.topic)
+      updatePersistentPath(zkClient, topicDirs.consumerOffsetDir + "/" + topicPartition.partition, offset.toString)
+      checkpointedZkOffsets.put(topicPartition, offset)
+      zkCommitMeter.mark()
     }
-    for ((topic, infos) <- topicRegistry) {
-      val topicDirs = new ZKGroupTopicDirs(config.groupId, topic)
-      for (info <- infos.values) {
-        val newOffset = info.getConsumeOffset
-        if (newOffset != checkpointedOffsets.get(TopicAndPartition(topic, info.partitionId))) {
+  }
+
+  /**
+   * KAFKA-1743: This method added for backward compatibility.
+   */
+  def commitOffsets { commitOffsets(true) }
+
+  def commitOffsets(isAutoCommit: Boolean) {
+    commitOffsets(isAutoCommit, null)
+  }
+
+  def commitOffsets(isAutoCommit: Boolean,
+                    topicPartitionOffsets: immutable.Map[TopicAndPartition, OffsetAndMetadata]) {
+    var retriesRemaining = 1 + (if (isAutoCommit) config.offsetsCommitMaxRetries else 0) // no retries for commits from auto-commit
+    var done = false
+
+    while (!done) {
+      val committed = offsetsChannelLock synchronized { // committed when we receive either no error codes or only MetadataTooLarge errors
+        val offsetsToCommit = if (topicPartitionOffsets == null) {immutable.Map(topicRegistry.flatMap { case (topic, partitionTopicInfos) =>
+          partitionTopicInfos.map { case (partition, info) =>
+            TopicAndPartition(info.topic, info.partitionId) -> OffsetAndMetadata(info.getConsumeOffset())
+          }
+        }.toSeq:_*)} else topicPartitionOffsets
+
+        if (offsetsToCommit.size > 0) {
+          if (config.offsetsStorage == "zookeeper") {
+            offsetsToCommit.foreach { case(topicAndPartition, offsetAndMetadata) =>
+              commitOffsetToZooKeeper(topicAndPartition, offsetAndMetadata.offset)
+            }
+            true
+          } else {
+            val offsetCommitRequest = OffsetCommitRequest(config.groupId, offsetsToCommit, clientId = config.clientId)
+            ensureOffsetManagerConnected()
+            try {
+              kafkaCommitMeter.mark(offsetsToCommit.size)
+              offsetsChannel.send(offsetCommitRequest)
+              val offsetCommitResponse = OffsetCommitResponse.readFrom(offsetsChannel.receive().buffer)
+              trace("Offset commit response: %s.".format(offsetCommitResponse))
+
+              val (commitFailed, retryableIfFailed, shouldRefreshCoordinator, errorCount) = {
+                offsetCommitResponse.commitStatus.foldLeft(false, false, false, 0) { case(folded, (topicPartition, errorCode)) =>
+
+                  if (errorCode == ErrorMapping.NoError && config.dualCommitEnabled) {
+                      val offset = offsetsToCommit(topicPartition).offset
+                      commitOffsetToZooKeeper(topicPartition, offset)
+                  }
+
+                  (folded._1 || // update commitFailed
+                     errorCode != ErrorMapping.NoError,
+
+                  folded._2 || // update retryableIfFailed - (only metadata too large is not retryable)
+                    (errorCode != ErrorMapping.NoError && errorCode != ErrorMapping.OffsetMetadataTooLargeCode),
+
+                  folded._3 || // update shouldRefreshCoordinator
+                    errorCode == ErrorMapping.NotCoordinatorForConsumerCode ||
+                    errorCode == ErrorMapping.ConsumerCoordinatorNotAvailableCode,
+
+                  // update error count
+                  folded._4 + (if (errorCode != ErrorMapping.NoError) 1 else 0))
+                }
+              }
+              debug(errorCount + " errors in offset commit response.")
+
+
+              if (shouldRefreshCoordinator) {
+                debug("Could not commit offsets (because offset coordinator has moved or is unavailable).")
+                offsetsChannel.disconnect()
+              }
+
+              if (commitFailed && retryableIfFailed)
+                false
+              else
+                true
+            }
+            catch {
+              case t: Throwable =>
+                error("Error while committing offsets.", t)
+                offsetsChannel.disconnect()
+                false
+            }
+          }
+        } else {
+          debug("No updates to offsets since last commit.")
+          true
+        }
+      }
+
+      done = if (isShuttingDown.get() && isAutoCommit) { // should not retry indefinitely if shutting down
+        retriesRemaining -= 1
+        retriesRemaining == 0 || committed
+      } else
+        true
+
+      if (!done) {
+        debug("Retrying offset commit in %d ms".format(config.offsetsChannelBackoffMs))
+        Thread.sleep(config.offsetsChannelBackoffMs)
+      }
+    }
+  }
+
+  private def fetchOffsetFromZooKeeper(topicPartition: TopicAndPartition) = {
+    val dirs = new ZKGroupTopicDirs(config.groupId, topicPartition.topic)
+    val offsetString = readDataMaybeNull(zkClient, dirs.consumerOffsetDir + "/" + topicPartition.partition)._1
+    offsetString match {
+      case Some(offsetStr) => (topicPartition, OffsetMetadataAndError(offsetStr.toLong, OffsetAndMetadata.NoMetadata, ErrorMapping.NoError))
+      case None => (topicPartition, OffsetMetadataAndError.NoOffset)
+    }
+  }
+
+  private def fetchOffsets(partitions: Seq[TopicAndPartition]) = {
+    if (partitions.isEmpty)
+      Some(OffsetFetchResponse(Map.empty))
+    else if (config.offsetsStorage == "zookeeper") {
+      val offsets = partitions.map(fetchOffsetFromZooKeeper)
+      Some(OffsetFetchResponse(immutable.Map(offsets:_*)))
+    } else {
+      val offsetFetchRequest = OffsetFetchRequest(groupId = config.groupId, requestInfo = partitions, clientId = config.clientId)
+
+      var offsetFetchResponseOpt: Option[OffsetFetchResponse] = None
+      while (!isShuttingDown.get && !offsetFetchResponseOpt.isDefined) {
+        offsetFetchResponseOpt = offsetsChannelLock synchronized {
+          ensureOffsetManagerConnected()
           try {
-            updatePersistentPath(zkClient, topicDirs.consumerOffsetDir + "/" + info.partitionId, newOffset.toString)
-            checkpointedOffsets.put(TopicAndPartition(topic, info.partitionId), newOffset)
-          } catch {
-            case t: Throwable =>
-              // log it and let it go
-              warn("exception during commitOffsets",  t)
+            offsetsChannel.send(offsetFetchRequest)
+            val offsetFetchResponse = OffsetFetchResponse.readFrom(offsetsChannel.receive().buffer)
+            trace("Offset fetch response: %s.".format(offsetFetchResponse))
+
+            val (leaderChanged, loadInProgress) =
+              offsetFetchResponse.requestInfo.foldLeft(false, false) { case(folded, (topicPartition, offsetMetadataAndError)) =>
+                (folded._1 || (offsetMetadataAndError.error == ErrorMapping.NotCoordinatorForConsumerCode),
+                 folded._2 || (offsetMetadataAndError.error == ErrorMapping.OffsetsLoadInProgressCode))
+              }
+
+            if (leaderChanged) {
+              offsetsChannel.disconnect()
+              debug("Could not fetch offsets (because offset manager has moved).")
+              None // retry
+            }
+            else if (loadInProgress) {
+              debug("Could not fetch offsets (because offset cache is being loaded).")
+              None // retry
+            }
+            else {
+              if (config.dualCommitEnabled) {
+                // if dual-commit is enabled (i.e., if a consumer group is migrating offsets to kafka), then pick the
+                // maximum between offsets in zookeeper and kafka.
+                val kafkaOffsets = offsetFetchResponse.requestInfo
+                val mostRecentOffsets = kafkaOffsets.map { case (topicPartition, kafkaOffset) =>
+                  val zkOffset = fetchOffsetFromZooKeeper(topicPartition)._2.offset
+                  val mostRecentOffset = zkOffset.max(kafkaOffset.offset)
+                  (topicPartition, OffsetMetadataAndError(mostRecentOffset, kafkaOffset.metadata, ErrorMapping.NoError))
+                }
+                Some(OffsetFetchResponse(mostRecentOffsets))
+              }
+              else
+                Some(offsetFetchResponse)
+            }
           }
-          debug("Committed offset " + newOffset + " for topic " + info)
+          catch {
+            case e: Exception =>
+              warn("Error while fetching offsets from %s:%d. Possible cause: %s".format(offsetsChannel.host, offsetsChannel.port, e.getMessage))
+              offsetsChannel.disconnect()
+              None // retry
+          }
+        }
+
+        if (offsetFetchResponseOpt.isEmpty) {
+          debug("Retrying offset fetch in %d ms".format(config.offsetsChannelBackoffMs))
+          Thread.sleep(config.offsetsChannelBackoffMs)
         }
       }
+
+      offsetFetchResponseOpt
     }
   }
 
+
   class ZKSessionExpireListener(val dirs: ZKGroupDirs,
                                  val consumerIdString: String,
                                  val topicCount: TopicCount,
@@ -333,9 +531,22 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
   class ZKRebalancerListener(val group: String, val consumerIdString: String,
                              val kafkaMessageAndMetadataStreams: mutable.Map[String,List[KafkaStream[_,_]]])
     extends IZkChildListener {
+
+    private val partitionAssignor = PartitionAssignor.createInstance(config.partitionAssignmentStrategy)
+
     private var isWatcherTriggered = false
     private val lock = new ReentrantLock
     private val cond = lock.newCondition()
+
+    @volatile private var allTopicsOwnedPartitionsCount = 0
+    newGauge("OwnedPartitionsCount",
+      new Gauge[Int] {
+        def value() = allTopicsOwnedPartitionsCount
+      },
+      Map("clientId" -> config.clientId, "groupId" -> config.groupId))
+
+    private def ownedPartitionsCountMetricTags(topic: String) = Map("clientId" -> config.clientId, "groupId" -> config.groupId, "topic" -> topic)
+
     private val watcherExecutorThread = new Thread(consumerIdString + "_watcher_executor") {
       override def run() {
         info("starting watcher executor thread for consumer " + consumerIdString)
@@ -384,10 +595,13 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     private def releasePartitionOwnership(localTopicRegistry: Pool[String, Pool[Int, PartitionTopicInfo]])= {
       info("Releasing partition ownership")
       for ((topic, infos) <- localTopicRegistry) {
-        for(partition <- infos.keys)
+        for(partition <- infos.keys) {
           deletePartitionOwnershipFromZK(topic, partition)
+        }
+        removeMetric("OwnedPartitionsCount", ownedPartitionsCountMetricTags(topic))
         localTopicRegistry.remove(topic)
       }
+      allTopicsOwnedPartitionsCount = 0
     }
 
     def resetState() {
@@ -396,35 +610,37 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
 
     def syncedRebalance() {
       rebalanceLock synchronized {
-        if(isShuttingDown.get())  {
-          return
-        } else {
-          for (i <- 0 until config.rebalanceMaxRetries) {
-            info("begin rebalancing consumer " + consumerIdString + " try #" + i)
-            var done = false
-            var cluster: Cluster = null
-            try {
-              cluster = getCluster(zkClient)
-              done = rebalance(cluster)
-            } catch {
-              case e: Throwable =>
-                /** occasionally, we may hit a ZK exception because the ZK state is changing while we are iterating.
-                 * For example, a ZK node can disappear between the time we get all children and the time we try to get
-                 * the value of a child. Just let this go since another rebalance will be triggered.
-                 **/
-                info("exception during rebalance ", e)
-            }
-            info("end rebalancing consumer " + consumerIdString + " try #" + i)
-            if (done) {
-              return
-            } else {
-              /* Here the cache is at a risk of being stale. To take future rebalancing decisions correctly, we should
-               * clear the cache */
-              info("Rebalancing attempt failed. Clearing the cache before the next rebalancing operation is triggered")
+        rebalanceTimer.time {
+          if(isShuttingDown.get())  {
+            return
+          } else {
+            for (i <- 0 until config.rebalanceMaxRetries) {
+              info("begin rebalancing consumer " + consumerIdString + " try #" + i)
+              var done = false
+              var cluster: Cluster = null
+              try {
+                cluster = getCluster(zkClient)
+                done = rebalance(cluster)
+              } catch {
+                case e: Throwable =>
+                  /** occasionally, we may hit a ZK exception because the ZK state is changing while we are iterating.
+                    * For example, a ZK node can disappear between the time we get all children and the time we try to get
+                    * the value of a child. Just let this go since another rebalance will be triggered.
+                    **/
+                  info("exception during rebalance ", e)
+              }
+              info("end rebalancing consumer " + consumerIdString + " try #" + i)
+              if (done) {
+                return
+              } else {
+                /* Here the cache is at a risk of being stale. To take future rebalancing decisions correctly, we should
+                 * clear the cache */
+                info("Rebalancing attempt failed. Clearing the cache before the next rebalancing operation is triggered")
+              }
+              // stop all fetchers and clear all the queues to avoid data duplication
+              closeFetchersForQueues(cluster, kafkaMessageAndMetadataStreams, topicThreadIdAndQueues.map(q => q._2))
+              Thread.sleep(config.rebalanceBackoffMs)
             }
-            // stop all fetchers and clear all the queues to avoid data duplication
-            closeFetchersForQueues(cluster, kafkaMessageAndMetadataStreams, topicThreadIdAndQueues.map(q => q._2))
-            Thread.sleep(config.rebalanceBackoffMs)
           }
         }
       }
@@ -433,8 +649,8 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     }
 
     private def rebalance(cluster: Cluster): Boolean = {
-      val myTopicThreadIdsMap = TopicCount.constructTopicCount(group, consumerIdString, zkClient).getConsumerThreadIdsPerTopic
-      val consumersPerTopicMap = getConsumersPerTopic(zkClient, group)
+      val myTopicThreadIdsMap = TopicCount.constructTopicCount(
+        group, consumerIdString, zkClient, config.excludeInternalTopics).getConsumerThreadIdsPerTopic
       val brokers = getAllBrokersInCluster(zkClient)
       if (brokers.size == 0) {
         // This can happen in a rare case when there are no brokers available in the cluster when the consumer is started.
@@ -445,9 +661,6 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
         true
       }
       else {
-        val partitionsAssignmentPerTopicMap = getPartitionAssignmentForTopics(zkClient, myTopicThreadIdsMap.keySet.toSeq)
-        val partitionsPerTopicMap = partitionsAssignmentPerTopicMap.map(p => (p._1, p._2.keySet.toSeq.sorted))
-
         /**
          * fetchers must be stopped to avoid data duplication, since if the current
          * rebalancing attempt fails, the partitions that are released could be owned by another consumer.
@@ -455,62 +668,61 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
          * partitions in parallel. So, not stopping the fetchers leads to duplicate data.
          */
         closeFetchers(cluster, kafkaMessageAndMetadataStreams, myTopicThreadIdsMap)
-
+        if (consumerRebalanceListener != null) {
+          info("Calling beforeReleasingPartitions() from rebalance listener.")
+          consumerRebalanceListener.beforeReleasingPartitions(
+            if (topicRegistry.size == 0)
+              new java.util.HashMap[String, java.util.Set[java.lang.Integer]]
+            else
+              mapAsJavaMap(topicRegistry.map(topics =>
+                topics._1 -> topics._2.keys
+              ).toMap).asInstanceOf[java.util.Map[String, java.util.Set[java.lang.Integer]]]
+          )
+        }
         releasePartitionOwnership(topicRegistry)
+        val assignmentContext = new AssignmentContext(group, consumerIdString, config.excludeInternalTopics, zkClient)
+        val partitionOwnershipDecision = partitionAssignor.assign(assignmentContext)
+        val currentTopicRegistry = new Pool[String, Pool[Int, PartitionTopicInfo]](
+          valueFactory = Some((topic: String) => new Pool[Int, PartitionTopicInfo]))
 
-        var partitionOwnershipDecision = new collection.mutable.HashMap[(String, Int), String]()
-        val currentTopicRegistry = new Pool[String, Pool[Int, PartitionTopicInfo]]
-
-        for ((topic, consumerThreadIdSet) <- myTopicThreadIdsMap) {
-          currentTopicRegistry.put(topic, new Pool[Int, PartitionTopicInfo])
-
-          val topicDirs = new ZKGroupTopicDirs(group, topic)
-          val curConsumers = consumersPerTopicMap.get(topic).get
-          val curPartitions: Seq[Int] = partitionsPerTopicMap.get(topic).get
+        // fetch current offsets for all topic-partitions
+        val topicPartitions = partitionOwnershipDecision.keySet.toSeq
 
-          val nPartsPerConsumer = curPartitions.size / curConsumers.size
-          val nConsumersWithExtraPart = curPartitions.size % curConsumers.size
+        val offsetFetchResponseOpt = fetchOffsets(topicPartitions)
 
-          info("Consumer " + consumerIdString + " rebalancing the following partitions: " + curPartitions +
-            " for topic " + topic + " with consumers: " + curConsumers)
-
-          for (consumerThreadId <- consumerThreadIdSet) {
-            val myConsumerPosition = curConsumers.indexOf(consumerThreadId)
-            assert(myConsumerPosition >= 0)
-            val startPart = nPartsPerConsumer*myConsumerPosition + myConsumerPosition.min(nConsumersWithExtraPart)
-            val nParts = nPartsPerConsumer + (if (myConsumerPosition + 1 > nConsumersWithExtraPart) 0 else 1)
+        if (isShuttingDown.get || !offsetFetchResponseOpt.isDefined)
+          false
+        else {
+          val offsetFetchResponse = offsetFetchResponseOpt.get
+          topicPartitions.foreach(topicAndPartition => {
+            val (topic, partition) = topicAndPartition.asTuple
+            val offset = offsetFetchResponse.requestInfo(topicAndPartition).offset
+            val threadId = partitionOwnershipDecision(topicAndPartition)
+            addPartitionTopicInfo(currentTopicRegistry, partition, topic, offset, threadId)
+          })
 
-            /**
-             *   Range-partition the sorted partitions to consumers for better locality.
-             *  The first few consumers pick up an extra partition, if any.
-             */
-            if (nParts <= 0)
-              warn("No broker partitions consumed by consumer thread " + consumerThreadId + " for topic " + topic)
-            else {
-              for (i <- startPart until startPart + nParts) {
-                val partition = curPartitions(i)
-                info(consumerThreadId + " attempting to claim partition " + partition)
-                addPartitionTopicInfo(currentTopicRegistry, topicDirs, partition, topic, consumerThreadId)
-                // record the partition ownership decision
-                partitionOwnershipDecision += ((topic, partition) -> consumerThreadId)
-              }
+          /**
+           * move the partition ownership here, since that can be used to indicate a truly successful rebalancing attempt
+           * A rebalancing attempt is completed successfully only after the fetchers have been started correctly
+           */
+          if(reflectPartitionOwnershipDecision(partitionOwnershipDecision)) {
+            allTopicsOwnedPartitionsCount = partitionOwnershipDecision.size
+
+            partitionOwnershipDecision.view.groupBy { case(topicPartition, consumerThreadId) => topicPartition.topic }
+                                      .foreach { case (topic, partitionThreadPairs) =>
+              newGauge("OwnedPartitionsCount",
+                new Gauge[Int] {
+                  def value() = partitionThreadPairs.size
+                },
+                ownedPartitionsCountMetricTags(topic))
             }
-          }
-        }
 
-        /**
-         * move the partition ownership here, since that can be used to indicate a truly successful rebalancing attempt
-         * A rebalancing attempt is completed successfully only after the fetchers have been started correctly
-         */
-        if(reflectPartitionOwnershipDecision(partitionOwnershipDecision.toMap)) {
-          info("Updating the cache")
-          debug("Partitions per topic cache " + partitionsPerTopicMap)
-          debug("Consumers per topic cache " + consumersPerTopicMap)
-          topicRegistry = currentTopicRegistry
-          updateFetcher(cluster)
-          true
-        } else {
-          false
+            topicRegistry = currentTopicRegistry
+            updateFetcher(cluster)
+            true
+          } else {
+            false
+          }
         }
       }
     }
@@ -523,7 +735,6 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
         case Some(f) =>
           f.stopConnections
           clearFetcherQueues(allPartitionInfos, cluster, queuesToBeCleared, messageStreams)
-          info("Committing all offsets after clearing the fetcher queues")
           /**
           * here, we need to commit offsets before stopping the consumer from returning any more messages
           * from the current data chunk. Since partition ownership is not yet released, this commit offsets
@@ -532,8 +743,10 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
           * by the consumer, there will be no more messages returned by this iterator until the rebalancing finishes
           * successfully and the fetchers restart to fetch more data chunks
           **/
-        if (config.autoCommitEnable)
-          commitOffsets
+        if (config.autoCommitEnable) {
+          info("Committing all offsets after clearing the fetcher queues")
+          commitOffsets(true)
+        }
         case None =>
       }
     }
@@ -555,7 +768,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     }
 
     private def closeFetchers(cluster: Cluster, messageStreams: Map[String,List[KafkaStream[_,_]]],
-                              relevantTopicThreadIdsMap: Map[String, Set[String]]) {
+                              relevantTopicThreadIdsMap: Map[String, Set[ConsumerThreadId]]) {
       // only clear the fetcher queues for certain topic partitions that *might* no longer be served by this consumer
       // after this rebalancing attempt
       val queuesTobeCleared = topicThreadIdAndQueues.filter(q => relevantTopicThreadIdsMap.contains(q._1._1)).map(q => q._2)
@@ -578,15 +791,15 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
       }
     }
 
-    private def reflectPartitionOwnershipDecision(partitionOwnershipDecision: Map[(String, Int), String]): Boolean = {
+    private def reflectPartitionOwnershipDecision(partitionOwnershipDecision: Map[TopicAndPartition, ConsumerThreadId]): Boolean = {
       var successfullyOwnedPartitions : List[(String, Int)] = Nil
       val partitionOwnershipSuccessful = partitionOwnershipDecision.map { partitionOwner =>
-        val topic = partitionOwner._1._1
-        val partition = partitionOwner._1._2
+        val topic = partitionOwner._1.topic
+        val partition = partitionOwner._1.partition
         val consumerThreadId = partitionOwner._2
         val partitionOwnerPath = getConsumerPartitionOwnerPath(group, topic, partition)
         try {
-          createEphemeralPathExpectConflict(zkClient, partitionOwnerPath, consumerThreadId)
+          createEphemeralPathExpectConflict(zkClient, partitionOwnerPath, consumerThreadId.toString)
           info(consumerThreadId + " successfully owned partition " + partition + " for topic " + topic)
           successfullyOwnedPartitions ::= (topic, partition)
           true
@@ -609,18 +822,10 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     }
 
     private def addPartitionTopicInfo(currentTopicRegistry: Pool[String, Pool[Int, PartitionTopicInfo]],
-                                      topicDirs: ZKGroupTopicDirs, partition: Int,
-                                      topic: String, consumerThreadId: String) {
-      val partTopicInfoMap = currentTopicRegistry.get(topic)
-
-      val znode = topicDirs.consumerOffsetDir + "/" + partition
-      val offsetString = readDataMaybeNull(zkClient, znode)._1
-      // If first time starting a consumer, set the initial offset to -1
-      val offset =
-        offsetString match {
-          case Some(offsetStr) => offsetStr.toLong
-          case None => PartitionTopicInfo.InvalidOffset
-        }
+                                      partition: Int, topic: String,
+                                      offset: Long, consumerThreadId: ConsumerThreadId) {
+      val partTopicInfoMap = currentTopicRegistry.getAndMaybePut(topic)
+
       val queue = topicThreadIdAndQueues.get((topic, consumerThreadId))
       val consumedOffset = new AtomicLong(offset)
       val fetchedOffset = new AtomicLong(offset)
@@ -633,7 +838,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
                                                  config.clientId)
       partTopicInfoMap.put(partition, partTopicInfo)
       debug(partTopicInfo + " selected new offset " + offset)
-      checkpointedOffsets.put(TopicAndPartition(topic, partition), offset)
+      checkpointedZkOffsets.put(TopicAndPartition(topic, partition), offset)
     }
   }
 
@@ -662,7 +867,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     val topicStreamsMap = loadBalancerListener.kafkaMessageAndMetadataStreams
 
     // map of {topic -> Set(thread-1, thread-2, ...)}
-    val consumerThreadIdsPerTopic: Map[String, Set[String]] =
+    val consumerThreadIdsPerTopic: Map[String, Set[ConsumerThreadId]] =
       topicCount.getConsumerThreadIdsPerTopic
 
     val allQueuesAndStreams = topicCount match {
@@ -692,10 +897,13 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
       topicThreadIdAndQueues.put(topicThreadId, q)
       debug("Adding topicThreadId %s and queue %s to topicThreadIdAndQueues data structure".format(topicThreadId, q.toString))
       newGauge(
-        config.clientId + "-" + config.groupId + "-" + topicThreadId._1 + "-" + topicThreadId._2 + "-FetchQueueSize",
+        "FetchQueueSize",
         new Gauge[Int] {
           def value = q.size
-        }
+        },
+        Map("clientId" -> config.clientId,
+          "topic" -> topicThreadId._1,
+          "threadId" -> topicThreadId._2.threadId.toString)
       )
     })
 
@@ -735,10 +943,10 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     private val wildcardQueuesAndStreams = (1 to numStreams)
       .map(e => {
         val queue = new LinkedBlockingQueue[FetchedDataChunk](config.queuedMaxMessages)
-        val stream = new KafkaStream[K,V](queue, 
-                                          config.consumerTimeoutMs, 
-                                          keyDecoder, 
-                                          valueDecoder, 
+        val stream = new KafkaStream[K,V](queue,
+                                          config.consumerTimeoutMs,
+                                          keyDecoder,
+                                          valueDecoder,
                                           config.clientId)
         (queue, stream)
     }).toList
@@ -746,10 +954,10 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
      // bootstrap with existing topics
     private var wildcardTopics =
       getChildrenParentMayNotExist(zkClient, BrokerTopicsPath)
-        .filter(topicFilter.isTopicAllowed)
+        .filter(topic => topicFilter.isTopicAllowed(topic, config.excludeInternalTopics))
 
     private val wildcardTopicCount = TopicCount.constructTopicCount(
-      consumerIdString, topicFilter, numStreams, zkClient)
+      consumerIdString, topicFilter, numStreams, zkClient, config.excludeInternalTopics)
 
     val dirs = new ZKGroupDirs(config.groupId)
     registerConsumerInZK(dirs, consumerIdString, wildcardTopicCount)
@@ -764,7 +972,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     def handleTopicEvent(allTopics: Seq[String]) {
       debug("Handling topic event")
 
-      val updatedTopics = allTopics.filter(topicFilter.isTopicAllowed)
+      val updatedTopics = allTopics.filter(topic => topicFilter.isTopicAllowed(topic, config.excludeInternalTopics))
 
       val addedTopics = updatedTopics filterNot (wildcardTopics contains)
       if (addedTopics.nonEmpty)
@@ -792,4 +1000,3 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
       wildcardQueuesAndStreams.map(_._2)
   }
 }
-
diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
index a1ee5a7074121..eb492f0044974 100644
--- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
+++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
@@ -114,7 +114,7 @@ class RequestSendThread(val controllerId: Int,
                         val channel: BlockingChannel)
   extends ShutdownableThread("Controller-%d-to-broker-%d-send-thread".format(controllerId, toBroker.id)) {
   private val lock = new Object()
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
+  private val stateChangeLogger = KafkaController.stateChangeLogger
   connectToBroker(toBroker, channel)
 
   override def doWork(): Unit = {
@@ -130,12 +130,13 @@ class RequestSendThread(val controllerId: Int,
           // removeBroker which will invoke shutdown() on this thread. At that point, we will stop retrying.
           try {
             channel.send(request)
+            receive = channel.receive()
             isSendSuccessful = true
           } catch {
-            case e => // if the send was not successful, reconnect to broker and resend the message
-              error(("Controller %d epoch %d failed to send %s request with correlation id %s to broker %s. " +
+            case e: Throwable => // if the send was not successful, reconnect to broker and resend the message
+              warn(("Controller %d epoch %d fails to send request %s to broker %s. " +
                 "Reconnecting to broker.").format(controllerId, controllerContext.epoch,
-                RequestKeys.nameForKey(request.requestId.get), request.correlationId, toBroker.toString()), e)
+                request.toString, toBroker.toString()), e)
               channel.disconnect()
               connectToBroker(toBroker, channel)
               isSendSuccessful = false
@@ -143,7 +144,6 @@ class RequestSendThread(val controllerId: Int,
               Utils.swallow(Thread.sleep(300))
           }
         }
-        receive = channel.receive()
         var response: RequestOrResponse = null
         request.requestId.get match {
           case RequestKeys.LeaderAndIsrKey =>
@@ -153,8 +153,8 @@ class RequestSendThread(val controllerId: Int,
           case RequestKeys.UpdateMetadataKey =>
             response = UpdateMetadataResponse.readFrom(receive.buffer)
         }
-        stateChangeLogger.trace("Controller %d epoch %d received response correlationId %d for a request sent to broker %s"
-                                  .format(controllerId, controllerContext.epoch, response.correlationId, toBroker.toString()))
+        stateChangeLogger.trace("Controller %d epoch %d received response %s for a request sent to broker %s"
+                                  .format(controllerId, controllerContext.epoch, response.toString, toBroker.toString))
 
         if(callback != null) {
           callback(response)
@@ -162,7 +162,7 @@ class RequestSendThread(val controllerId: Int,
       }
     } catch {
       case e: Throwable =>
-        warn("Controller %d fails to send a request to broker %s".format(controllerId, toBroker.toString()), e)
+        error("Controller %d fails to send a request to broker %s".format(controllerId, toBroker.toString()), e)
         // If there is any socket error (eg, socket timeout), the channel is no longer usable and needs to be recreated.
         channel.disconnect()
     }
@@ -173,7 +173,7 @@ class RequestSendThread(val controllerId: Int,
       channel.connect()
       info("Controller %d connected to %s for sending state change requests".format(controllerId, broker.toString()))
     } catch {
-      case e => {
+      case e: Throwable => {
         channel.disconnect()
         error("Controller %d's connection to broker %s was unsuccessful".format(controllerId, broker.toString()), e)
       }
@@ -188,7 +188,7 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
   val leaderAndIsrRequestMap = new mutable.HashMap[Int, mutable.HashMap[(String, Int), PartitionStateInfo]]
   val stopReplicaRequestMap = new mutable.HashMap[Int, Seq[StopReplicaRequestInfo]]
   val updateMetadataRequestMap = new mutable.HashMap[Int, mutable.HashMap[TopicAndPartition, PartitionStateInfo]]
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
+  private val stateChangeLogger = KafkaController.stateChangeLogger
 
   def newBatch() {
     // raise error if the previous batch is not empty
@@ -206,12 +206,17 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
   def addLeaderAndIsrRequestForBrokers(brokerIds: Seq[Int], topic: String, partition: Int,
                                        leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch,
                                        replicas: Seq[Int], callback: (RequestOrResponse) => Unit = null) {
-    brokerIds.filter(b => b >= 0).foreach { brokerId =>
-      leaderAndIsrRequestMap.getOrElseUpdate(brokerId, new mutable.HashMap[(String, Int), PartitionStateInfo])
-      leaderAndIsrRequestMap(brokerId).put((topic, partition),
-        PartitionStateInfo(leaderIsrAndControllerEpoch, replicas.toSet))
+    val topicAndPartition: TopicAndPartition = TopicAndPartition(topic, partition)
+
+    brokerIds.filter(b => b >= 0).foreach {
+      brokerId =>
+        leaderAndIsrRequestMap.getOrElseUpdate(brokerId, new mutable.HashMap[(String, Int), PartitionStateInfo])
+        leaderAndIsrRequestMap(brokerId).put((topic, partition),
+          PartitionStateInfo(leaderIsrAndControllerEpoch, replicas.toSet))
     }
-    addUpdateMetadataRequestForBrokers(controllerContext.liveOrShuttingDownBrokerIds.toSeq)
+
+    addUpdateMetadataRequestForBrokers(controllerContext.liveOrShuttingDownBrokerIds.toSeq,
+                                       Set(topicAndPartition))
   }
 
   def addStopReplicaRequestForBrokers(brokerIds: Seq[Int], topic: String, partition: Int, deletePartition: Boolean,
@@ -228,27 +233,42 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
     }
   }
 
-  /* Send UpdateMetadataRequest to the given brokers for all partitions except those being deleted as part of delete topic
-   *
-   */
+  /** Send UpdateMetadataRequest to the given brokers for the given partitions and partitions that are being deleted */
   def addUpdateMetadataRequestForBrokers(brokerIds: Seq[Int],
+                                         partitions: collection.Set[TopicAndPartition] = Set.empty[TopicAndPartition],
                                          callback: (RequestOrResponse) => Unit = null) {
-    val partitionList = controllerContext.partitionLeadershipInfo.keySet.dropWhile(
-      p => controller.deleteTopicManager.isTopicQueuedUpForDeletion(p.topic))
-    partitionList.foreach { partition =>
+    def updateMetadataRequestMapFor(partition: TopicAndPartition, beingDeleted: Boolean) {
       val leaderIsrAndControllerEpochOpt = controllerContext.partitionLeadershipInfo.get(partition)
       leaderIsrAndControllerEpochOpt match {
         case Some(leaderIsrAndControllerEpoch) =>
           val replicas = controllerContext.partitionReplicaAssignment(partition).toSet
-          val partitionStateInfo = PartitionStateInfo(leaderIsrAndControllerEpoch, replicas)
+          val partitionStateInfo = if (beingDeleted) {
+            val leaderAndIsr = new LeaderAndIsr(LeaderAndIsr.LeaderDuringDelete, leaderIsrAndControllerEpoch.leaderAndIsr.isr)
+            PartitionStateInfo(LeaderIsrAndControllerEpoch(leaderAndIsr, leaderIsrAndControllerEpoch.controllerEpoch), replicas)
+          } else {
+            PartitionStateInfo(leaderIsrAndControllerEpoch, replicas)
+          }
           brokerIds.filter(b => b >= 0).foreach { brokerId =>
             updateMetadataRequestMap.getOrElseUpdate(brokerId, new mutable.HashMap[TopicAndPartition, PartitionStateInfo])
             updateMetadataRequestMap(brokerId).put(partition, partitionStateInfo)
           }
         case None =>
-          info("Leader not assigned yet for partition %s. Skip sending udpate metadata request".format(partition))
+          info("Leader not yet assigned for partition %s. Skip sending UpdateMetadataRequest.".format(partition))
       }
     }
+
+    val filteredPartitions = {
+      val givenPartitions = if (partitions.isEmpty)
+        controllerContext.partitionLeadershipInfo.keySet
+      else
+        partitions
+      if (controller.deleteTopicManager.partitionsToBeDeleted.isEmpty)
+        givenPartitions
+      else
+        givenPartitions -- controller.deleteTopicManager.partitionsToBeDeleted
+    }
+    filteredPartitions.foreach(partition => updateMetadataRequestMapFor(partition, beingDeleted = false))
+    controller.deleteTopicManager.partitionsToBeDeleted.foreach(partition => updateMetadataRequestMapFor(partition, beingDeleted = true))
   }
 
   def sendRequestsToBrokers(controllerEpoch: Int, correlationId: Int) {
@@ -272,10 +292,10 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
       val broker = m._1
       val partitionStateInfos = m._2.toMap
       val updateMetadataRequest = new UpdateMetadataRequest(controllerId, controllerEpoch, correlationId, clientId,
-                                                            partitionStateInfos, controllerContext.liveOrShuttingDownBrokers)
+        partitionStateInfos, controllerContext.liveOrShuttingDownBrokers)
       partitionStateInfos.foreach(p => stateChangeLogger.trace(("Controller %d epoch %d sending UpdateMetadata request %s with " +
         "correlationId %d to broker %d for partition %s").format(controllerId, controllerEpoch, p._2.leaderIsrAndControllerEpoch,
-                                                                 correlationId, broker, p._1)))
+        correlationId, broker, p._1)))
       controller.sendRequest(broker, updateMetadataRequest, null)
     }
     updateMetadataRequestMap.clear()
diff --git a/core/src/main/scala/kafka/controller/KafkaController.scala b/core/src/main/scala/kafka/controller/KafkaController.scala
index d812cb4121d7f..66df6d2fbdbdd 100644
--- a/core/src/main/scala/kafka/controller/KafkaController.scala
+++ b/core/src/main/scala/kafka/controller/KafkaController.scala
@@ -21,12 +21,13 @@ import collection.Set
 import com.yammer.metrics.core.Gauge
 import java.lang.{IllegalStateException, Object}
 import java.util.concurrent.TimeUnit
+import kafka.admin.AdminUtils
 import kafka.admin.PreferredReplicaLeaderElectionCommand
 import kafka.api._
 import kafka.cluster.Broker
 import kafka.common._
+import kafka.log.LogConfig
 import kafka.metrics.{KafkaTimer, KafkaMetricsGroup}
-import kafka.server.{ZookeeperLeaderElector, KafkaConfig}
 import kafka.utils.ZkUtils._
 import kafka.utils._
 import kafka.utils.Utils._
@@ -34,10 +35,11 @@ import org.apache.zookeeper.Watcher.Event.KeeperState
 import org.I0Itec.zkclient.{IZkDataListener, IZkStateListener, ZkClient}
 import org.I0Itec.zkclient.exception.{ZkNodeExistsException, ZkNoNodeException}
 import java.util.concurrent.atomic.AtomicInteger
-import org.apache.log4j.Logger
+import java.util.concurrent.locks.ReentrantLock
+import scala.None
+import kafka.server._
 import scala.Some
 import kafka.common.TopicAndPartition
-import java.util.concurrent.locks.ReentrantLock
 
 class ControllerContext(val zkClient: ZkClient,
                         val zkSessionTimeout: Int) {
@@ -113,22 +115,20 @@ class ControllerContext(val zkClient: ZkClient,
   }
 
   def removeTopic(topic: String) = {
-    partitionLeadershipInfo = partitionLeadershipInfo.dropWhile(p => p._1.topic.equals(topic))
-    partitionReplicaAssignment = partitionReplicaAssignment.dropWhile(p => p._1.topic.equals(topic))
+    partitionLeadershipInfo = partitionLeadershipInfo.filter{ case (topicAndPartition, _) => topicAndPartition.topic != topic }
+    partitionReplicaAssignment = partitionReplicaAssignment.filter{ case (topicAndPartition, _) => topicAndPartition.topic != topic }
     allTopics -= topic
   }
 }
 
-trait KafkaControllerMBean {
-  def shutdownBroker(id: Int): Set[TopicAndPartition]
-}
 
 object KafkaController extends Logging {
-  val MBeanName = "kafka.controller:type=KafkaController,name=ControllerOps"
-  val stateChangeLogger = "state.change.logger"
+  val stateChangeLogger = new StateChangeLogger("state.change.logger")
   val InitialControllerEpoch = 1
   val InitialControllerEpochZkVersion = 1
 
+  case class StateChangeLogger(override val loggerName: String) extends Logging
+
   def parseControllerId(controllerInfoString: String): Int = {
     try {
       Json.parseFull(controllerInfoString) match {
@@ -151,10 +151,10 @@ object KafkaController extends Logging {
   }
 }
 
-class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logging with KafkaMetricsGroup with KafkaControllerMBean {
+class KafkaController(val config : KafkaConfig, zkClient: ZkClient, val brokerState: BrokerState) extends Logging with KafkaMetricsGroup {
   this.logIdent = "[Controller " + config.brokerId + "]: "
   private var isRunning = true
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
+  private val stateChangeLogger = KafkaController.stateChangeLogger
   val controllerContext = new ControllerContext(zkClient, config.zkSessionTimeoutMs)
   val partitionStateMachine = new PartitionStateMachine(this)
   val replicaStateMachine = new ReplicaStateMachine(this)
@@ -164,12 +164,14 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
   // kafka server
   private val autoRebalanceScheduler = new KafkaScheduler(1)
   var deleteTopicManager: TopicDeletionManager = null
-  val offlinePartitionSelector = new OfflinePartitionLeaderSelector(controllerContext)
+  val offlinePartitionSelector = new OfflinePartitionLeaderSelector(controllerContext, config)
   private val reassignedPartitionLeaderSelector = new ReassignedPartitionLeaderSelector(controllerContext)
   private val preferredReplicaPartitionLeaderSelector = new PreferredReplicaPartitionLeaderSelector(controllerContext)
   private val controlledShutdownPartitionLeaderSelector = new ControlledShutdownLeaderSelector(controllerContext)
   private val brokerRequestBatch = new ControllerBrokerRequestBatch(this)
-  registerControllerChangedListener()
+
+  private val partitionReassignedListener = new PartitionsReassignedListener(this)
+  private val preferredReplicaElectionListener = new PreferredReplicaElectionListener(this)
 
   newGauge(
     "ActiveControllerCount",
@@ -234,7 +236,6 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
           throw new BrokerNotAvailableException("Broker id %d does not exist.".format(id))
 
         controllerContext.shuttingDownBrokerIds.add(id)
-
         debug("All shutting down brokers: " + controllerContext.shuttingDownBrokerIds.mkString(","))
         debug("Live brokers: " + controllerContext.liveBrokerIds.mkString(","))
       }
@@ -247,31 +248,31 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
 
       allPartitionsAndReplicationFactorOnBroker.foreach {
         case(topicAndPartition, replicationFactor) =>
-        // Move leadership serially to relinquish lock.
-        inLock(controllerContext.controllerLock) {
-          controllerContext.partitionLeadershipInfo.get(topicAndPartition).foreach { currLeaderIsrAndControllerEpoch =>
-            if (currLeaderIsrAndControllerEpoch.leaderAndIsr.leader == id) {
-              // If the broker leads the topic partition, transition the leader and update isr. Updates zk and
-              // notifies all affected brokers
-              partitionStateMachine.handleStateChanges(Set(topicAndPartition), OnlinePartition,
-                controlledShutdownPartitionLeaderSelector)
-            }
-            else {
-              // Stop the replica first. The state change below initiates ZK changes which should take some time
-              // before which the stop replica request should be completed (in most cases)
-              brokerRequestBatch.newBatch()
-              brokerRequestBatch.addStopReplicaRequestForBrokers(Seq(id), topicAndPartition.topic,
-                topicAndPartition.partition, deletePartition = false)
-              brokerRequestBatch.sendRequestsToBrokers(epoch, controllerContext.correlationId.getAndIncrement)
-
-              // If the broker is a follower, updates the isr in ZK and notifies the current leader
-              replicaStateMachine.handleStateChanges(Set(PartitionAndReplica(topicAndPartition.topic,
-                topicAndPartition.partition, id)), OfflineReplica)
+          // Move leadership serially to relinquish lock.
+          inLock(controllerContext.controllerLock) {
+            controllerContext.partitionLeadershipInfo.get(topicAndPartition).foreach { currLeaderIsrAndControllerEpoch =>
+              if (replicationFactor > 1) {
+                if (currLeaderIsrAndControllerEpoch.leaderAndIsr.leader == id) {
+                  // If the broker leads the topic partition, transition the leader and update isr. Updates zk and
+                  // notifies all affected brokers
+                  partitionStateMachine.handleStateChanges(Set(topicAndPartition), OnlinePartition,
+                    controlledShutdownPartitionLeaderSelector)
+                } else {
+                  // Stop the replica first. The state change below initiates ZK changes which should take some time
+                  // before which the stop replica request should be completed (in most cases)
+                  brokerRequestBatch.newBatch()
+                  brokerRequestBatch.addStopReplicaRequestForBrokers(Seq(id), topicAndPartition.topic,
+                    topicAndPartition.partition, deletePartition = false)
+                  brokerRequestBatch.sendRequestsToBrokers(epoch, controllerContext.correlationId.getAndIncrement)
+
+                  // If the broker is a follower, updates the isr in ZK and notifies the current leader
+                  replicaStateMachine.handleStateChanges(Set(PartitionAndReplica(topicAndPartition.topic,
+                    topicAndPartition.partition, id)), OfflineReplica)
+                }
+              }
             }
           }
-        }
       }
-
       def replicatedPartitionsBrokerLeads() = inLock(controllerContext.controllerLock) {
         trace("All leaders = " + controllerContext.partitionLeadershipInfo.mkString(","))
         controllerContext.partitionLeadershipInfo.filter {
@@ -299,6 +300,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
   def onControllerFailover() {
     if(isRunning) {
       info("Broker %d starting become controller state transition".format(config.brokerId))
+      //read controller epoch from zk
+      readControllerEpochFromZookeeper()
       // increment the controller epoch
       incrementControllerEpoch(zkClient)
       // before reading source of truth from zookeeper, register the listeners to get broker/topic callbacks
@@ -311,8 +314,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
       partitionStateMachine.startup()
       // register the partition change listeners for all existing topics on failover
       controllerContext.allTopics.foreach(topic => partitionStateMachine.registerPartitionChangeListener(topic))
-      Utils.registerMBean(this, KafkaController.MBeanName)
       info("Broker %d is ready to serve as the new controller with epoch %d".format(config.brokerId, epoch))
+      brokerState.newState(RunningAsController)
       maybeTriggerPartitionReassignment()
       maybeTriggerPreferredReplicaElection()
       /* send partition leadership info to all live brokers */
@@ -334,16 +337,34 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
    * required to clean up internal controller data structures
    */
   def onControllerResignation() {
-    inLock(controllerContext.controllerLock) {
-      autoRebalanceScheduler.shutdown()
+    // de-register listeners
+    deregisterReassignedPartitionsListener()
+    deregisterPreferredReplicaElectionListener()
+
+    // shutdown delete topic manager
+    if (deleteTopicManager != null)
       deleteTopicManager.shutdown()
-      Utils.unregisterMBean(KafkaController.MBeanName)
+
+    // shutdown leader rebalance scheduler
+    if (config.autoLeaderRebalanceEnable)
+      autoRebalanceScheduler.shutdown()
+
+    inLock(controllerContext.controllerLock) {
+      // de-register partition ISR listener for on-going partition reassignment task
+      deregisterReassignedPartitionsIsrChangeListeners()
+      // shutdown partition state machine
       partitionStateMachine.shutdown()
+      // shutdown replica state machine
       replicaStateMachine.shutdown()
+      // shutdown controller channel manager
       if(controllerContext.controllerChannelManager != null) {
         controllerContext.controllerChannelManager.shutdown()
         controllerContext.controllerChannelManager = null
       }
+      // reset controller context
+      controllerContext.epoch=0
+      controllerContext.epochZkVersion=0
+      brokerState.newState(RunningAsBroker)
     }
   }
 
@@ -433,7 +454,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
     if(replicasForTopicsToBeDeleted.size > 0) {
       // it is required to mark the respective replicas in TopicDeletionFailed state since the replica cannot be
       // deleted when the broker is down. This will prevent the replica from being in TopicDeletionStarted state indefinitely
-      // since topic deletion cannot be retried if at least one replica is in TopicDeletionStarted state
+      // since topic deletion cannot be retried until at least one replica is in TopicDeletionStarted state
       deleteTopicManager.failReplicaDeletion(replicasForTopicsToBeDeleted)
     }
   }
@@ -443,6 +464,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
    * and partitions as input. It does the following -
    * 1. Registers partition change listener. This is not required until KAFKA-347
    * 2. Invokes the new partition callback
+   * 3. Send metadata request with the new topic to all brokers so they allow requests for that topic to be served
    */
   def onNewTopicCreation(topics: Set[String], newPartitions: Set[TopicAndPartition]) {
     info("New topic creation callback for %s".format(newPartitions.mkString(",")))
@@ -545,7 +567,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
         info("Removed partition %s from the list of reassigned partitions in zookeeper".format(topicAndPartition))
         controllerContext.partitionsBeingReassigned.remove(topicAndPartition)
         //12. After electing leader, the replicas and isr information changes, so resend the update metadata request to every broker
-        sendUpdateMetadataRequest(controllerContext.liveOrShuttingDownBrokerIds.toSeq)
+        sendUpdateMetadataRequest(controllerContext.liveOrShuttingDownBrokerIds.toSeq, Set(topicAndPartition))
         // signal delete topic thread if reassignment for some partitions belonging to topics being deleted just completed
         deleteTopicManager.resumeDeletionForTopics(Set(topicAndPartition.topic))
     }
@@ -581,8 +603,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
               // first register ISR change listener
               watchIsrChangesForReassignedPartition(topic, partition, reassignedPartitionContext)
               controllerContext.partitionsBeingReassigned.put(topicAndPartition, reassignedPartitionContext)
-              // halt topic deletion for the partitions being reassigned
-              deleteTopicManager.haltTopicDeletion(Set(topic))
+              // mark topic ineligible for deletion for the partitions being reassigned
+              deleteTopicManager.markTopicIneligibleForDeletion(Set(topic))
               onPartitionReassignment(topicAndPartition, reassignedPartitionContext)
             } else {
               // some replica in RAR is not alive. Fail partition reassignment
@@ -601,16 +623,16 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
     }
   }
 
-  def onPreferredReplicaElection(partitions: Set[TopicAndPartition]) {
+  def onPreferredReplicaElection(partitions: Set[TopicAndPartition], isTriggeredByAutoRebalance: Boolean = false) {
     info("Starting preferred replica leader election for partitions %s".format(partitions.mkString(",")))
     try {
       controllerContext.partitionsUndergoingPreferredReplicaElection ++= partitions
-      deleteTopicManager.haltTopicDeletion(partitions.map(_.topic))
+      deleteTopicManager.markTopicIneligibleForDeletion(partitions.map(_.topic))
       partitionStateMachine.handleStateChanges(partitions, OnlinePartition, preferredReplicaPartitionLeaderSelector)
     } catch {
       case e: Throwable => error("Error completing preferred replica leader election for partitions %s".format(partitions.mkString(",")), e)
     } finally {
-      removePartitionsFromPreferredReplicaElection(partitions)
+      removePartitionsFromPreferredReplicaElection(partitions, isTriggeredByAutoRebalance)
       deleteTopicManager.resumeDeletionForTopics(partitions.map(_.topic))
     }
   }
@@ -638,16 +660,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
   def shutdown() = {
     inLock(controllerContext.controllerLock) {
       isRunning = false
-      partitionStateMachine.shutdown()
-      replicaStateMachine.shutdown()
-      if (config.autoLeaderRebalanceEnable)
-        autoRebalanceScheduler.shutdown()
-      if(controllerContext.controllerChannelManager != null) {
-        controllerContext.controllerChannelManager.shutdown()
-        controllerContext.controllerChannelManager = null
-        info("Controller shutdown complete")
-      }
     }
+    onControllerResignation()
   }
 
   def sendRequest(brokerId : Int, request : RequestOrResponse, callback: (RequestOrResponse) => Unit = null) = {
@@ -748,17 +762,16 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
 
   private def initializeTopicDeletion() {
     val topicsQueuedForDeletion = ZkUtils.getChildrenParentMayNotExist(zkClient, ZkUtils.DeleteTopicsPath).toSet
-    val replicasOnDeadBrokers = controllerContext.partitionReplicaAssignment.filter(r =>
-      r._2.foldLeft(false)((res,r) => res || !controllerContext.liveBrokerIds.contains(r)))
-    val topicsWithReplicasOnDeadBrokers = replicasOnDeadBrokers.map(_._1.topic).toSet
+    val topicsWithReplicasOnDeadBrokers = controllerContext.partitionReplicaAssignment.filter { case(partition, replicas) =>
+      replicas.exists(r => !controllerContext.liveBrokerIds.contains(r)) }.keySet.map(_.topic)
     val topicsForWhichPartitionReassignmentIsInProgress = controllerContext.partitionsUndergoingPreferredReplicaElection.map(_.topic)
     val topicsForWhichPreferredReplicaElectionIsInProgress = controllerContext.partitionsBeingReassigned.keySet.map(_.topic)
-    val haltedTopicsForDeletion = topicsWithReplicasOnDeadBrokers | topicsForWhichPartitionReassignmentIsInProgress |
+    val topicsIneligibleForDeletion = topicsWithReplicasOnDeadBrokers | topicsForWhichPartitionReassignmentIsInProgress |
                                   topicsForWhichPreferredReplicaElectionIsInProgress
     info("List of topics to be deleted: %s".format(topicsQueuedForDeletion.mkString(",")))
-    info("List of topics halted for deletion: %s".format(haltedTopicsForDeletion.mkString(",")))
+    info("List of topics ineligible for deletion: %s".format(topicsIneligibleForDeletion.mkString(",")))
     // initialize the topic deletion manager
-    deleteTopicManager = new TopicDeletionManager(this, topicsQueuedForDeletion, haltedTopicsForDeletion)
+    deleteTopicManager = new TopicDeletionManager(this, topicsQueuedForDeletion, topicsIneligibleForDeletion)
   }
 
   private def maybeTriggerPartitionReassignment() {
@@ -873,15 +886,37 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
   }
 
   private def registerReassignedPartitionsListener() = {
-    zkClient.subscribeDataChanges(ZkUtils.ReassignPartitionsPath, new PartitionsReassignedListener(this))
+    zkClient.subscribeDataChanges(ZkUtils.ReassignPartitionsPath, partitionReassignedListener)
+  }
+
+  private def deregisterReassignedPartitionsListener() = {
+    zkClient.unsubscribeDataChanges(ZkUtils.ReassignPartitionsPath, partitionReassignedListener)
   }
 
   private def registerPreferredReplicaElectionListener() {
-    zkClient.subscribeDataChanges(ZkUtils.PreferredReplicaLeaderElectionPath, new PreferredReplicaElectionListener(this))
+    zkClient.subscribeDataChanges(ZkUtils.PreferredReplicaLeaderElectionPath, preferredReplicaElectionListener)
   }
 
-  private def registerControllerChangedListener() {
-    zkClient.subscribeDataChanges(ZkUtils.ControllerEpochPath, new ControllerEpochListener(this))
+  private def deregisterPreferredReplicaElectionListener() {
+    zkClient.unsubscribeDataChanges(ZkUtils.PreferredReplicaLeaderElectionPath, preferredReplicaElectionListener)
+  }
+
+  private def deregisterReassignedPartitionsIsrChangeListeners() {
+    controllerContext.partitionsBeingReassigned.foreach {
+      case (topicAndPartition, reassignedPartitionsContext) =>
+        val zkPartitionPath = ZkUtils.getTopicPartitionLeaderAndIsrPath(topicAndPartition.topic, topicAndPartition.partition)
+        zkClient.unsubscribeDataChanges(zkPartitionPath, reassignedPartitionsContext.isrChangeListener)
+    }
+  }
+
+  private def readControllerEpochFromZookeeper() {
+    // initialize the controller epoch and zk version by reading from zookeeper
+    if(ZkUtils.pathExists(controllerContext.zkClient, ZkUtils.ControllerEpochPath)) {
+      val epochData = ZkUtils.readData(controllerContext.zkClient, ZkUtils.ControllerEpochPath)
+      controllerContext.epoch = epochData._1.toInt
+      controllerContext.epochZkVersion = epochData._2.getVersion
+      info("Initialized controller epoch to %d and zk version %d".format(controllerContext.epoch, controllerContext.epochZkVersion))
+    }
   }
 
   def removePartitionFromReassignedPartitions(topicAndPartition: TopicAndPartition) {
@@ -913,7 +948,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
     }
   }
 
-  def removePartitionsFromPreferredReplicaElection(partitionsToBeRemoved: Set[TopicAndPartition]) {
+  def removePartitionsFromPreferredReplicaElection(partitionsToBeRemoved: Set[TopicAndPartition],
+                                                   isTriggeredByAutoRebalance : Boolean) {
     for(partition <- partitionsToBeRemoved) {
       // check the status
       val currentLeader = controllerContext.partitionLeadershipInfo(partition).leaderAndIsr.leader
@@ -924,7 +960,8 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
         warn("Partition %s failed to complete preferred replica leader election. Leader is %d".format(partition, currentLeader))
       }
     }
-    ZkUtils.deletePath(zkClient, ZkUtils.PreferredReplicaLeaderElectionPath)
+    if (!isTriggeredByAutoRebalance)
+      ZkUtils.deletePath(zkClient, ZkUtils.PreferredReplicaLeaderElectionPath)
     controllerContext.partitionsUndergoingPreferredReplicaElection --= partitionsToBeRemoved
   }
 
@@ -933,9 +970,9 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
    * metadata requests
    * @param brokers The brokers that the update metadata request should be sent to
    */
-  def sendUpdateMetadataRequest(brokers: Seq[Int]) {
+  def sendUpdateMetadataRequest(brokers: Seq[Int], partitions: Set[TopicAndPartition] = Set.empty[TopicAndPartition]) {
     brokerRequestBatch.newBatch()
-    brokerRequestBatch.addUpdateMetadataRequestForBrokers(brokers)
+    brokerRequestBatch.addUpdateMetadataRequestForBrokers(brokers, partitions)
     brokerRequestBatch.sendRequestsToBrokers(epoch, controllerContext.correlationId.getAndIncrement)
   }
 
@@ -956,7 +993,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
     var zkWriteCompleteOrUnnecessary = false
     while (!zkWriteCompleteOrUnnecessary) {
       // refresh leader and isr from zookeeper again
-      val leaderIsrAndEpochOpt = ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
+      val leaderIsrAndEpochOpt = ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
       zkWriteCompleteOrUnnecessary = leaderIsrAndEpochOpt match {
         case Some(leaderIsrAndEpoch) => // increment the leader epoch even if the ISR changes
           val leaderAndIsr = leaderIsrAndEpoch.leaderAndIsr
@@ -967,17 +1004,25 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
               "controller was elected with epoch %d. Aborting state change by this controller".format(controllerEpoch))
           if (leaderAndIsr.isr.contains(replicaId)) {
             // if the replica to be removed from the ISR is also the leader, set the new leader value to -1
-            val newLeader = if(replicaId == leaderAndIsr.leader) -1 else leaderAndIsr.leader
+            val newLeader = if (replicaId == leaderAndIsr.leader) LeaderAndIsr.NoLeader else leaderAndIsr.leader
+            var newIsr = leaderAndIsr.isr.filter(b => b != replicaId)
+
+            // if the replica to be removed from the ISR is the last surviving member of the ISR and unclean leader election
+            // is disallowed for the corresponding topic, then we must preserve the ISR membership so that the replica can
+            // eventually be restored as the leader.
+            if (newIsr.isEmpty && !LogConfig.fromProps(config.props.props, AdminUtils.fetchTopicConfig(zkClient,
+              topicAndPartition.topic)).uncleanLeaderElectionEnable) {
+              info("Retaining last ISR %d of partition %s since unclean leader election is disabled".format(replicaId, topicAndPartition))
+              newIsr = leaderAndIsr.isr
+            }
+
             val newLeaderAndIsr = new LeaderAndIsr(newLeader, leaderAndIsr.leaderEpoch + 1,
-              leaderAndIsr.isr.filter(b => b != replicaId), leaderAndIsr.zkVersion + 1)
+              newIsr, leaderAndIsr.zkVersion + 1)
             // update the new leadership decision in zookeeper or retry
-            val (updateSucceeded, newVersion) = ZkUtils.conditionalUpdatePersistentPath(
-              zkClient,
-              ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partition),
-              ZkUtils.leaderAndIsrZkData(newLeaderAndIsr, epoch),
-              leaderAndIsr.zkVersion)
-            newLeaderAndIsr.zkVersion = newVersion
+            val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topic, partition,
+              newLeaderAndIsr, epoch, leaderAndIsr.zkVersion)
 
+            newLeaderAndIsr.zkVersion = newVersion
             finalLeaderIsrAndControllerEpoch = Some(LeaderIsrAndControllerEpoch(newLeaderAndIsr, epoch))
             controllerContext.partitionLeadershipInfo.put(topicAndPartition, finalLeaderIsrAndControllerEpoch.get)
             if (updateSucceeded)
@@ -1011,7 +1056,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
     var zkWriteCompleteOrUnnecessary = false
     while (!zkWriteCompleteOrUnnecessary) {
       // refresh leader and isr from zookeeper again
-      val leaderIsrAndEpochOpt = ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
+      val leaderIsrAndEpochOpt = ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
       zkWriteCompleteOrUnnecessary = leaderIsrAndEpochOpt match {
         case Some(leaderIsrAndEpoch) =>
           val leaderAndIsr = leaderIsrAndEpoch.leaderAndIsr
@@ -1025,11 +1070,9 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
           val newLeaderAndIsr = new LeaderAndIsr(leaderAndIsr.leader, leaderAndIsr.leaderEpoch + 1,
                                                  leaderAndIsr.isr, leaderAndIsr.zkVersion + 1)
           // update the new leadership decision in zookeeper or retry
-          val (updateSucceeded, newVersion) = ZkUtils.conditionalUpdatePersistentPath(
-            zkClient,
-            ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partition),
-            ZkUtils.leaderAndIsrZkData(newLeaderAndIsr, epoch),
-            leaderAndIsr.zkVersion)
+          val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topic,
+            partition, newLeaderAndIsr, epoch, leaderAndIsr.zkVersion)
+
           newLeaderAndIsr.zkVersion = newVersion
           finalLeaderIsrAndControllerEpoch = Some(LeaderIsrAndControllerEpoch(newLeaderAndIsr, epoch))
           if (updateSucceeded)
@@ -1089,6 +1132,7 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
             topicsNotInPreferredReplica =
               topicAndPartitionsForBroker.filter {
                 case(topicPartition, replicas) => {
+                  controllerContext.partitionLeadershipInfo.contains(topicPartition) &&
                   controllerContext.partitionLeadershipInfo(topicPartition).leaderAndIsr.leader != leaderBroker
                 }
               }
@@ -1101,26 +1145,18 @@ class KafkaController(val config : KafkaConfig, zkClient: ZkClient) extends Logg
           // check ratio and if greater than desired ratio, trigger a rebalance for the topic partitions
           // that need to be on this broker
           if (imbalanceRatio > (config.leaderImbalancePerBrokerPercentage.toDouble / 100)) {
-            inLock(controllerContext.controllerLock) {
-              // do this check only if the broker is live and there are no partitions being reassigned currently
-              // and preferred replica election is not in progress
-              if (controllerContext.liveBrokerIds.contains(leaderBroker) &&
-                  controllerContext.partitionsBeingReassigned.size == 0 &&
-                  controllerContext.partitionsUndergoingPreferredReplicaElection.size == 0) {
-                val zkPath = ZkUtils.PreferredReplicaLeaderElectionPath
-                val partitionsList = topicsNotInPreferredReplica.keys.map(e => Map("topic" -> e.topic, "partition" -> e.partition))
-                val jsonData = Json.encode(Map("version" -> 1, "partitions" -> partitionsList))
-                try {
-                  ZkUtils.createPersistentPath(zkClient, zkPath, jsonData)
-                  info("Created preferred replica election path with %s".format(jsonData))
-                } catch {
-                  case e2: ZkNodeExistsException =>
-                    val partitionsUndergoingPreferredReplicaElection =
-                      PreferredReplicaLeaderElectionCommand.parsePreferredReplicaElectionData(ZkUtils.readData(zkClient, zkPath)._1)
-                    error("Preferred replica leader election currently in progress for " +
-                          "%s. Aborting operation".format(partitionsUndergoingPreferredReplicaElection));
-                  case e3: Throwable =>
-                    error("Error while trying to auto rebalance topics %s".format(topicsNotInPreferredReplica.keys))
+            topicsNotInPreferredReplica.foreach {
+              case(topicPartition, replicas) => {
+                inLock(controllerContext.controllerLock) {
+                  // do this check only if the broker is live and there are no partitions being reassigned currently
+                  // and preferred replica election is not in progress
+                  if (controllerContext.liveBrokerIds.contains(leaderBroker) &&
+                      controllerContext.partitionsBeingReassigned.size == 0 &&
+                      controllerContext.partitionsUndergoingPreferredReplicaElection.size == 0 &&
+                      !deleteTopicManager.isTopicQueuedUpForDeletion(topicPartition.topic) &&
+                      controllerContext.allTopics.contains(topicPartition.topic)) {
+                    onPreferredReplicaElection(Set(topicPartition), true)
+                  }
                 }
               }
             }
@@ -1265,8 +1301,7 @@ class PreferredReplicaElectionListener(controller: KafkaController) extends IZkD
         error("Skipping preferred replica election for partitions %s since the respective topics are being deleted"
           .format(partitionsForTopicsToBeDeleted))
       }
-      else
-        controller.onPreferredReplicaElection(partitions -- partitionsForTopicsToBeDeleted)
+      controller.onPreferredReplicaElection(partitions -- partitionsForTopicsToBeDeleted)
     }
   }
 
@@ -1279,43 +1314,6 @@ class PreferredReplicaElectionListener(controller: KafkaController) extends IZkD
   }
 }
 
-class ControllerEpochListener(controller: KafkaController) extends IZkDataListener with Logging {
-  this.logIdent = "[ControllerEpochListener on " + controller.config.brokerId + "]: "
-  val controllerContext = controller.controllerContext
-  readControllerEpochFromZookeeper()
-
-  /**
-   * Invoked when a controller updates the epoch value
-   * @throws Exception On any error.
-   */
-  @throws(classOf[Exception])
-  def handleDataChange(dataPath: String, data: Object) {
-    debug("Controller epoch listener fired with new epoch " + data.toString)
-    inLock(controllerContext.controllerLock) {
-      // read the epoch path to get the zk version
-      readControllerEpochFromZookeeper()
-    }
-  }
-
-  /**
-   * @throws Exception
-   *             On any error.
-   */
-  @throws(classOf[Exception])
-  def handleDataDeleted(dataPath: String) {
-  }
-
-  private def readControllerEpochFromZookeeper() {
-    // initialize the controller epoch and zk version by reading from zookeeper
-    if(ZkUtils.pathExists(controllerContext.zkClient, ZkUtils.ControllerEpochPath)) {
-      val epochData = ZkUtils.readData(controllerContext.zkClient, ZkUtils.ControllerEpochPath)
-      controllerContext.epoch = epochData._1.toInt
-      controllerContext.epochZkVersion = epochData._2.getVersion
-      info("Initialized controller epoch to %d and zk version %d".format(controllerContext.epoch, controllerContext.epochZkVersion))
-    }
-  }
-}
-
 case class ReassignedPartitionsContext(var newReplicas: Seq[Int] = Seq.empty,
                                        var isrChangeListener: ReassignedPartitionsIsrChangeListener = null)
 
diff --git a/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala b/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala
index fa29bbe82db35..4a31c7271c2d0 100644
--- a/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala
+++ b/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala
@@ -16,9 +16,12 @@
  */
 package kafka.controller
 
+import kafka.admin.AdminUtils
 import kafka.api.LeaderAndIsr
+import kafka.log.LogConfig
 import kafka.utils.Logging
 import kafka.common.{LeaderElectionNotNeededException, TopicAndPartition, StateChangeFailedException, NoReplicaOnlineException}
+import kafka.server.KafkaConfig
 
 trait PartitionLeaderSelector {
 
@@ -37,12 +40,14 @@ trait PartitionLeaderSelector {
  * Select the new leader, new isr and receiving replicas (for the LeaderAndIsrRequest):
  * 1. If at least one broker from the isr is alive, it picks a broker from the live isr as the new leader and the live
  *    isr as the new isr.
- * 2. Else, it picks some alive broker from the assigned replica list as the new leader and the new isr.
- * 3. If no broker in the assigned replica list is alive, it throws NoReplicaOnlineException
+ * 2. Else, if unclean leader election for the topic is disabled, it throws a NoReplicaOnlineException.
+ * 3. Else, it picks some alive broker from the assigned replica list as the new leader and the new isr.
+ * 4. If no broker in the assigned replica list is alive, it throws a NoReplicaOnlineException
  * Replicas to receive LeaderAndIsr request = live assigned replicas
  * Once the leader is successfully registered in zookeeper, it updates the allLeaders cache
  */
-class OfflinePartitionLeaderSelector(controllerContext: ControllerContext) extends PartitionLeaderSelector with Logging {
+class OfflinePartitionLeaderSelector(controllerContext: ControllerContext, config: KafkaConfig)
+  extends PartitionLeaderSelector with Logging {
   this.logIdent = "[OfflinePartitionLeaderSelector]: "
 
   def selectLeader(topicAndPartition: TopicAndPartition, currentLeaderAndIsr: LeaderAndIsr): (LeaderAndIsr, Seq[Int]) = {
@@ -54,6 +59,15 @@ class OfflinePartitionLeaderSelector(controllerContext: ControllerContext) exten
         val currentLeaderIsrZkPathVersion = currentLeaderAndIsr.zkVersion
         val newLeaderAndIsr = liveBrokersInIsr.isEmpty match {
           case true =>
+            // Prior to electing an unclean (i.e. non-ISR) leader, ensure that doing so is not disallowed by the configuration
+            // for unclean leader election.
+            if (!LogConfig.fromProps(config.props.props, AdminUtils.fetchTopicConfig(controllerContext.zkClient,
+              topicAndPartition.topic)).uncleanLeaderElectionEnable) {
+              throw new NoReplicaOnlineException(("No broker in ISR for partition " +
+                "%s is alive. Live brokers are: [%s],".format(topicAndPartition, controllerContext.liveBrokerIds)) +
+                " ISR brokers are: [%s]".format(currentLeaderAndIsr.isr.mkString(",")))
+            }
+
             debug("No broker in ISR is alive for %s. Pick the leader from the alive assigned replicas: %s"
               .format(topicAndPartition, liveAssignedReplicas.mkString(",")))
             liveAssignedReplicas.isEmpty match {
@@ -69,7 +83,8 @@ class OfflinePartitionLeaderSelector(controllerContext: ControllerContext) exten
                 new LeaderAndIsr(newLeader, currentLeaderEpoch + 1, List(newLeader), currentLeaderIsrZkPathVersion + 1)
             }
           case false =>
-            val newLeader = liveBrokersInIsr.head
+            val liveReplicasInIsr = liveAssignedReplicas.filter(r => liveBrokersInIsr.contains(r))
+            val newLeader = liveReplicasInIsr.head
             debug("Some broker in ISR is alive for %s. Select %d from ISR %s to be the leader."
                   .format(topicAndPartition, newLeader, liveBrokersInIsr.mkString(",")))
             new LeaderAndIsr(newLeader, currentLeaderEpoch + 1, liveBrokersInIsr.toList, currentLeaderIsrZkPathVersion + 1)
@@ -77,7 +92,7 @@ class OfflinePartitionLeaderSelector(controllerContext: ControllerContext) exten
         info("Selected new leader and ISR %s for offline partition %s".format(newLeaderAndIsr.toString(), topicAndPartition))
         (newLeaderAndIsr, liveAssignedReplicas)
       case None =>
-        throw new NoReplicaOnlineException("Partition %s doesn't have".format(topicAndPartition) + "replicas assigned to it")
+        throw new NoReplicaOnlineException("Partition %s doesn't have replicas assigned to it".format(topicAndPartition))
     }
   }
 }
@@ -196,4 +211,4 @@ class NoOpLeaderSelector(controllerContext: ControllerContext) extends Partition
     warn("I should never have been asked to perform leader election, returning the current LeaderAndIsr and replica assignment.")
     (currentLeaderAndIsr, controllerContext.partitionReplicaAssignment(topicAndPartition))
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/kafka/controller/PartitionStateMachine.scala b/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
index 57c96b5539f20..2f0694bc5cdfc 100644
--- a/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
+++ b/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
@@ -22,7 +22,7 @@ import collection.mutable.Buffer
 import java.util.concurrent.atomic.AtomicBoolean
 import kafka.api.LeaderAndIsr
 import kafka.common.{LeaderElectionNotNeededException, TopicAndPartition, StateChangeFailedException, NoReplicaOnlineException}
-import kafka.utils.{Logging, ZkUtils}
+import kafka.utils.{Logging, ZkUtils, ReplicationUtils}
 import org.I0Itec.zkclient.{IZkDataListener, IZkChildListener}
 import org.I0Itec.zkclient.exception.ZkNodeExistsException
 import org.apache.log4j.Logger
@@ -45,12 +45,16 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
   private val controllerContext = controller.controllerContext
   private val controllerId = controller.config.brokerId
   private val zkClient = controllerContext.zkClient
-  var partitionState: mutable.Map[TopicAndPartition, PartitionState] = mutable.Map.empty
-  val brokerRequestBatch = new ControllerBrokerRequestBatch(controller)
+  private val partitionState: mutable.Map[TopicAndPartition, PartitionState] = mutable.Map.empty
+  private val brokerRequestBatch = new ControllerBrokerRequestBatch(controller)
   private val hasStarted = new AtomicBoolean(false)
   private val noOpPartitionLeaderSelector = new NoOpLeaderSelector(controllerContext)
+  private val topicChangeListener = new TopicChangeListener()
+  private val deleteTopicsListener = new DeleteTopicsListener()
+  private val addPartitionsListener: mutable.Map[String, AddPartitionsListener] = mutable.Map.empty
+  private val stateChangeLogger = KafkaController.stateChangeLogger
+
   this.logIdent = "[Partition state machine on Controller " + controllerId + "]: "
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
 
   /**
    * Invoked on successful controller election. First registers a topic change listener since that triggers all
@@ -60,24 +64,45 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
   def startup() {
     // initialize partition state
     initializePartitionState()
+    // set started flag
     hasStarted.set(true)
     // try to move partitions to online state
     triggerOnlinePartitionStateChange()
+
     info("Started partition state machine with initial state -> " + partitionState.toString())
   }
 
   // register topic and partition change listeners
   def registerListeners() {
     registerTopicChangeListener()
-    registerDeleteTopicListener()
+    if(controller.config.deleteTopicEnable)
+      registerDeleteTopicListener()
+  }
+
+  // de-register topic and partition change listeners
+  def deregisterListeners() {
+    deregisterTopicChangeListener()
+    addPartitionsListener.foreach {
+      case (topic, listener) =>
+        zkClient.unsubscribeDataChanges(ZkUtils.getTopicPath(topic), listener)
+    }
+    addPartitionsListener.clear()
+    if(controller.config.deleteTopicEnable)
+      deregisterDeleteTopicListener()
   }
 
   /**
    * Invoked on controller shutdown.
    */
   def shutdown() {
+    // reset started flag
     hasStarted.set(false)
+    // clear partition state
     partitionState.clear()
+    // de-register all ZK listeners
+    deregisterListeners()
+
+    info("Stopped partition state machine")
   }
 
   /**
@@ -167,8 +192,9 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
           assignReplicasToPartitions(topic, partition)
           partitionState.put(topicAndPartition, NewPartition)
           val assignedReplicas = controllerContext.partitionReplicaAssignment(topicAndPartition).mkString(",")
-          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from NotExists to New with assigned replicas %s"
-                                    .format(controllerId, controller.epoch, topicAndPartition, assignedReplicas))
+          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from %s to %s with assigned replicas %s"
+                                    .format(controllerId, controller.epoch, topicAndPartition, currState, targetState,
+                                            assignedReplicas))
           // post: partition has been assigned replicas
         case OnlinePartition =>
           assertValidPreviousStates(topicAndPartition, List(NewPartition, OnlinePartition, OfflinePartition), OnlinePartition)
@@ -184,22 +210,22 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
           }
           partitionState.put(topicAndPartition, OnlinePartition)
           val leader = controllerContext.partitionLeadershipInfo(topicAndPartition).leaderAndIsr.leader
-          stateChangeLogger.trace("Controller %d epoch %d changed partition %s from %s to OnlinePartition with leader %d"
-                                    .format(controllerId, controller.epoch, topicAndPartition, partitionState(topicAndPartition), leader))
+          stateChangeLogger.trace("Controller %d epoch %d changed partition %s from %s to %s with leader %d"
+                                    .format(controllerId, controller.epoch, topicAndPartition, currState, targetState, leader))
            // post: partition has a leader
         case OfflinePartition =>
           // pre: partition should be in New or Online state
           assertValidPreviousStates(topicAndPartition, List(NewPartition, OnlinePartition, OfflinePartition), OfflinePartition)
           // should be called when the leader for a partition is no longer alive
-          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from Online to Offline"
-                                    .format(controllerId, controller.epoch, topicAndPartition))
+          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from %s to %s"
+                                    .format(controllerId, controller.epoch, topicAndPartition, currState, targetState))
           partitionState.put(topicAndPartition, OfflinePartition)
           // post: partition has no alive leader
         case NonExistentPartition =>
           // pre: partition should be in Offline state
           assertValidPreviousStates(topicAndPartition, List(OfflinePartition), NonExistentPartition)
-          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from Offline to NotExists"
-                                    .format(controllerId, controller.epoch, topicAndPartition))
+          stateChangeLogger.trace("Controller %d epoch %d changed partition %s state from %s to %s"
+                                    .format(controllerId, controller.epoch, topicAndPartition, currState, targetState))
           partitionState.put(topicAndPartition, NonExistentPartition)
           // post: partition state is deleted from all brokers and zookeeper
       }
@@ -288,7 +314,7 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
         } catch {
           case e: ZkNodeExistsException =>
             // read the controller epoch
-            val leaderIsrAndEpoch = ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topicAndPartition.topic,
+            val leaderIsrAndEpoch = ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topicAndPartition.topic,
               topicAndPartition.partition).get
             val failMsg = ("encountered error while changing partition %s's state from New to Online since LeaderAndIsr path already " +
                            "exists with value %s and controller epoch %d")
@@ -329,9 +355,8 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
         }
         // elect new leader or throw exception
         val (leaderAndIsr, replicas) = leaderSelector.selectLeader(topicAndPartition, currentLeaderAndIsr)
-        val (updateSucceeded, newVersion) = ZkUtils.conditionalUpdatePersistentPath(zkClient,
-          ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partition),
-          ZkUtils.leaderAndIsrZkData(leaderAndIsr, controller.epoch), currentLeaderAndIsr.zkVersion)
+        val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topic, partition,
+          leaderAndIsr, controller.epoch, currentLeaderAndIsr.zkVersion)
         newLeaderAndIsr = leaderAndIsr
         newLeaderAndIsr.zkVersion = newVersion
         zookeeperPathUpdateSucceeded = updateSucceeded
@@ -358,20 +383,34 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
   }
 
   private def registerTopicChangeListener() = {
-    zkClient.subscribeChildChanges(ZkUtils.BrokerTopicsPath, new TopicChangeListener())
+    zkClient.subscribeChildChanges(ZkUtils.BrokerTopicsPath, topicChangeListener)
+  }
+
+  private def deregisterTopicChangeListener() = {
+    zkClient.unsubscribeChildChanges(ZkUtils.BrokerTopicsPath, topicChangeListener)
   }
 
   def registerPartitionChangeListener(topic: String) = {
-    zkClient.subscribeDataChanges(ZkUtils.getTopicPath(topic), new AddPartitionsListener(topic))
+    addPartitionsListener.put(topic, new AddPartitionsListener(topic))
+    zkClient.subscribeDataChanges(ZkUtils.getTopicPath(topic), addPartitionsListener(topic))
+  }
+
+  def deregisterPartitionChangeListener(topic: String) = {
+    zkClient.unsubscribeDataChanges(ZkUtils.getTopicPath(topic), addPartitionsListener(topic))
+    addPartitionsListener.remove(topic)
   }
 
   private def registerDeleteTopicListener() = {
-    zkClient.subscribeChildChanges(ZkUtils.DeleteTopicsPath, new DeleteTopicsListener())
+    zkClient.subscribeChildChanges(ZkUtils.DeleteTopicsPath, deleteTopicsListener)
+  }
+
+  private def deregisterDeleteTopicListener() = {
+    zkClient.unsubscribeChildChanges(ZkUtils.DeleteTopicsPath, deleteTopicsListener)
   }
 
   private def getLeaderIsrAndEpochOrThrowException(topic: String, partition: Int): LeaderIsrAndControllerEpoch = {
     val topicAndPartition = TopicAndPartition(topic, partition)
-    ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition) match {
+    ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition) match {
       case Some(currentLeaderIsrAndEpoch) => currentLeaderIsrAndEpoch
       case None =>
         val failMsg = "LeaderAndIsr information doesn't exist for partition %s in %s state"
@@ -438,22 +477,24 @@ class PartitionStateMachine(controller: KafkaController) extends Logging {
         }
         debug("Delete topics listener fired for topics %s to be deleted".format(topicsToBeDeleted.mkString(",")))
         val nonExistentTopics = topicsToBeDeleted.filter(t => !controllerContext.allTopics.contains(t))
-        if(nonExistentTopics.size > 0)
+        if(nonExistentTopics.size > 0) {
           warn("Ignoring request to delete non-existing topics " + nonExistentTopics.mkString(","))
+          nonExistentTopics.foreach(topic => ZkUtils.deletePathRecursive(zkClient, ZkUtils.getDeleteTopicPath(topic)))
+        }
         topicsToBeDeleted --= nonExistentTopics
         if(topicsToBeDeleted.size > 0) {
           info("Starting topic deletion for topics " + topicsToBeDeleted.mkString(","))
-          // add topic to deletion list
-          controller.deleteTopicManager.enqueueTopicsForDeletion(topicsToBeDeleted)
-          // halt if other state changes are in progress
+          // mark topic ineligible for deletion if other state changes are in progress
           topicsToBeDeleted.foreach { topic =>
             val preferredReplicaElectionInProgress =
               controllerContext.partitionsUndergoingPreferredReplicaElection.map(_.topic).contains(topic)
             val partitionReassignmentInProgress =
               controllerContext.partitionsBeingReassigned.keySet.map(_.topic).contains(topic)
             if(preferredReplicaElectionInProgress || partitionReassignmentInProgress)
-              controller.deleteTopicManager.haltTopicDeletion(Set(topic))
+              controller.deleteTopicManager.markTopicIneligibleForDeletion(Set(topic))
           }
+          // add topic to deletion list
+          controller.deleteTopicManager.enqueueTopicsForDeletion(topicsToBeDeleted)
         }
       }
     }
@@ -507,5 +548,3 @@ case object NewPartition extends PartitionState { val state: Byte = 0 }
 case object OnlinePartition extends PartitionState { val state: Byte = 1 }
 case object OfflinePartition extends PartitionState { val state: Byte = 2 }
 case object NonExistentPartition extends PartitionState { val state: Byte = 3 }
-
-
diff --git a/core/src/main/scala/kafka/controller/ReplicaStateMachine.scala b/core/src/main/scala/kafka/controller/ReplicaStateMachine.scala
index 613aec6f40db5..3e87e1d36f87b 100644
--- a/core/src/main/scala/kafka/controller/ReplicaStateMachine.scala
+++ b/core/src/main/scala/kafka/controller/ReplicaStateMachine.scala
@@ -20,7 +20,7 @@ import collection._
 import collection.JavaConversions._
 import java.util.concurrent.atomic.AtomicBoolean
 import kafka.common.{TopicAndPartition, StateChangeFailedException}
-import kafka.utils.{ZkUtils, Logging}
+import kafka.utils.{ZkUtils, ReplicationUtils, Logging}
 import org.I0Itec.zkclient.IZkChildListener
 import org.apache.log4j.Logger
 import kafka.controller.Callbacks._
@@ -40,7 +40,7 @@ import kafka.utils.Utils._
  * 4. ReplicaDeletionStarted: If replica deletion starts, it is moved to this state. Valid previous state is OfflineReplica
  * 5. ReplicaDeletionSuccessful: If replica responds with no error code in response to a delete replica request, it is
  *                        moved to this state. Valid previous state is ReplicaDeletionStarted
- * 6. ReplicaDeletionFailed: If replica deletion fails, it is moved to this state. Valid previous state is ReplicaDeletionStarted
+ * 6. ReplicaDeletionIneligible: If replica deletion fails, it is moved to this state. Valid previous state is ReplicaDeletionStarted
  * 7. NonExistentReplica: If a replica is deleted successfully, it is moved to this state. Valid previous state is
  *                        ReplicaDeletionSuccessful
  */
@@ -48,11 +48,14 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
   private val controllerContext = controller.controllerContext
   private val controllerId = controller.config.brokerId
   private val zkClient = controllerContext.zkClient
-  var replicaState: mutable.Map[PartitionAndReplica, ReplicaState] = mutable.Map.empty
-  val brokerRequestBatch = new ControllerBrokerRequestBatch(controller)
+  private val replicaState: mutable.Map[PartitionAndReplica, ReplicaState] = mutable.Map.empty
+  private val brokerChangeListener = new BrokerChangeListener()
+  private val brokerRequestBatch = new ControllerBrokerRequestBatch(controller)
   private val hasStarted = new AtomicBoolean(false)
+  private val stateChangeLogger = KafkaController.stateChangeLogger
+
   this.logIdent = "[Replica state machine on controller " + controller.config.brokerId + "]: "
-  private val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
+
 
   /**
    * Invoked on successful controller election. First registers a broker change listener since that triggers all
@@ -62,23 +65,38 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
   def startup() {
     // initialize replica state
     initializeReplicaState()
+    // set started flag
     hasStarted.set(true)
     // move all Online replicas to Online
     handleStateChanges(controllerContext.allLiveReplicas(), OnlineReplica)
+
     info("Started replica state machine with initial state -> " + replicaState.toString())
   }
 
-  // register broker change listener
+  // register ZK listeners of the replica state machine
   def registerListeners() {
+    // register broker change listener
     registerBrokerChangeListener()
   }
 
+  // de-register ZK listeners of the replica state machine
+  def deregisterListeners() {
+    // de-register broker change listener
+    deregisterBrokerChangeListener()
+  }
+
   /**
    * Invoked on controller shutdown.
    */
   def shutdown() {
+    // reset started flag
     hasStarted.set(false)
+    // reset replica state
     replicaState.clear()
+    // de-register all ZK listeners
+    deregisterListeners()
+
+    info("Stopped replica state machine")
   }
 
   /**
@@ -115,7 +133,7 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
    * --send LeaderAndIsr request with current leader and isr to the new replica and UpdateMetadata request for the
    *   partition to every live broker
    *
-   * NewReplica,OnlineReplica,OfflineReplica,ReplicaDeletionFailed -> OfflineReplica
+   * NewReplica,OnlineReplica,OfflineReplica,ReplicaDeletionIneligible -> OfflineReplica
    * --send StopReplicaRequest to the replica (w/o deletion)
    * --remove this replica from the isr and send LeaderAndIsr request (with new isr) to the leader replica and
    *   UpdateMetadata request for the partition to every live broker.
@@ -126,7 +144,7 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
    * ReplicaDeletionStarted -> ReplicaDeletionSuccessful
    * -- mark the state of the replica in the state machine
    *
-   * ReplicaDeletionStarted -> ReplicaDeletionFailed
+   * ReplicaDeletionStarted -> ReplicaDeletionIneligible
    * -- mark the state of the replica in the state machine
    *
    * ReplicaDeletionSuccessful -> NonExistentReplica
@@ -146,14 +164,14 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
       throw new StateChangeFailedException(("Controller %d epoch %d initiated state change of replica %d for partition %s " +
                                             "to %s failed because replica state machine has not started")
                                               .format(controllerId, controller.epoch, replicaId, topicAndPartition, targetState))
+    val currState = replicaState.getOrElseUpdate(partitionAndReplica, NonExistentReplica)
     try {
-      replicaState.getOrElseUpdate(partitionAndReplica, NonExistentReplica)
       val replicaAssignment = controllerContext.partitionReplicaAssignment(topicAndPartition)
       targetState match {
         case NewReplica =>
           assertValidPreviousStates(partitionAndReplica, List(NonExistentReplica), targetState)
           // start replica as a follower to the current leader for its partition
-          val leaderIsrAndControllerEpochOpt = ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
+          val leaderIsrAndControllerEpochOpt = ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition)
           leaderIsrAndControllerEpochOpt match {
             case Some(leaderIsrAndControllerEpoch) =>
               if(leaderIsrAndControllerEpoch.leaderAndIsr.leader == replicaId)
@@ -165,45 +183,47 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
             case None => // new leader request will be sent to this replica when one gets elected
           }
           replicaState.put(partitionAndReplica, NewReplica)
-          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to NewReplica"
-                                    .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+                                    .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState,
+                                            targetState))
         case ReplicaDeletionStarted =>
           assertValidPreviousStates(partitionAndReplica, List(OfflineReplica), targetState)
           replicaState.put(partitionAndReplica, ReplicaDeletionStarted)
           // send stop replica command
           brokerRequestBatch.addStopReplicaRequestForBrokers(List(replicaId), topic, partition, deletePartition = true,
             callbacks.stopReplicaResponseCallback)
-          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to ReplicaDeletionStarted"
-            .format(controllerId, controller.epoch, replicaId, topicAndPartition))
-        case ReplicaDeletionFailed =>
+          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+            .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
+        case ReplicaDeletionIneligible =>
           assertValidPreviousStates(partitionAndReplica, List(ReplicaDeletionStarted), targetState)
-          replicaState.put(partitionAndReplica, ReplicaDeletionFailed)
-          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to ReplicaDeletionFailed"
-            .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+          replicaState.put(partitionAndReplica, ReplicaDeletionIneligible)
+          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+            .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
         case ReplicaDeletionSuccessful =>
           assertValidPreviousStates(partitionAndReplica, List(ReplicaDeletionStarted), targetState)
           replicaState.put(partitionAndReplica, ReplicaDeletionSuccessful)
-          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to ReplicaDeletionSuccessful"
-            .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+            .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
         case NonExistentReplica =>
           assertValidPreviousStates(partitionAndReplica, List(ReplicaDeletionSuccessful), targetState)
           // remove this replica from the assigned replicas list for its partition
           val currentAssignedReplicas = controllerContext.partitionReplicaAssignment(topicAndPartition)
           controllerContext.partitionReplicaAssignment.put(topicAndPartition, currentAssignedReplicas.filterNot(_ == replicaId))
           replicaState.remove(partitionAndReplica)
-          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to NonExistentReplica"
-            .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+          stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+            .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
         case OnlineReplica =>
           assertValidPreviousStates(partitionAndReplica,
-            List(NewReplica, OnlineReplica, OfflineReplica, ReplicaDeletionFailed), targetState)
+            List(NewReplica, OnlineReplica, OfflineReplica, ReplicaDeletionIneligible), targetState)
           replicaState(partitionAndReplica) match {
             case NewReplica =>
               // add this replica to the assigned replicas list for its partition
               val currentAssignedReplicas = controllerContext.partitionReplicaAssignment(topicAndPartition)
               if(!currentAssignedReplicas.contains(replicaId))
                 controllerContext.partitionReplicaAssignment.put(topicAndPartition, currentAssignedReplicas :+ replicaId)
-              stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to OnlineReplica"
-                                        .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+              stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+                                        .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState,
+                                                targetState))
             case _ =>
               // check if the leader for this partition ever existed
               controllerContext.partitionLeadershipInfo.get(topicAndPartition) match {
@@ -211,8 +231,8 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
                   brokerRequestBatch.addLeaderAndIsrRequestForBrokers(List(replicaId), topic, partition, leaderIsrAndControllerEpoch,
                     replicaAssignment)
                   replicaState.put(partitionAndReplica, OnlineReplica)
-                  stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to OnlineReplica"
-                    .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+                  stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+                    .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
                 case None => // that means the partition was never in OnlinePartition state, this means the broker never
                   // started a log for that partition and does not have a high watermark value for this partition
               }
@@ -220,7 +240,7 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
           replicaState.put(partitionAndReplica, OnlineReplica)
         case OfflineReplica =>
           assertValidPreviousStates(partitionAndReplica,
-            List(NewReplica, OnlineReplica, OfflineReplica, ReplicaDeletionFailed), targetState)
+            List(NewReplica, OnlineReplica, OfflineReplica, ReplicaDeletionIneligible), targetState)
           // send stop replica command to the replica so that it stops fetching from the leader
           brokerRequestBatch.addStopReplicaRequestForBrokers(List(replicaId), topic, partition, deletePartition = false)
           // As an optimization, the controller removes dead replicas from the ISR
@@ -229,12 +249,15 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
               case Some(currLeaderIsrAndControllerEpoch) =>
                 controller.removeReplicaFromIsr(topic, partition, replicaId) match {
                   case Some(updatedLeaderIsrAndControllerEpoch) =>
-                    // send the shrunk ISR state change request only to the leader
-                    brokerRequestBatch.addLeaderAndIsrRequestForBrokers(List(updatedLeaderIsrAndControllerEpoch.leaderAndIsr.leader),
-                      topic, partition, updatedLeaderIsrAndControllerEpoch, replicaAssignment)
+                    // send the shrunk ISR state change request to all the remaining alive replicas of the partition.
+                    val currentAssignedReplicas = controllerContext.partitionReplicaAssignment(topicAndPartition)
+                    if (!controller.deleteTopicManager.isPartitionToBeDeleted(topicAndPartition)) {
+                      brokerRequestBatch.addLeaderAndIsrRequestForBrokers(currentAssignedReplicas.filterNot(_ == replicaId),
+                        topic, partition, updatedLeaderIsrAndControllerEpoch, replicaAssignment)
+                    }
                     replicaState.put(partitionAndReplica, OfflineReplica)
-                    stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s to OfflineReplica"
-                      .format(controllerId, controller.epoch, replicaId, topicAndPartition))
+                    stateChangeLogger.trace("Controller %d epoch %d changed state of replica %d for partition %s from %s to %s"
+                      .format(controllerId, controller.epoch, replicaId, topicAndPartition, currState, targetState))
                     false
                   case None =>
                     true
@@ -250,8 +273,8 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
     }
     catch {
       case t: Throwable =>
-        stateChangeLogger.error("Controller %d epoch %d initiated state change of replica %d for partition [%s,%d] to %s failed"
-                                  .format(controllerId, controller.epoch, replicaId, topic, partition, targetState), t)
+        stateChangeLogger.error("Controller %d epoch %d initiated state change of replica %d for partition [%s,%d] from %s to %s failed"
+                                  .format(controllerId, controller.epoch, replicaId, topic, partition, currState, targetState), t)
     }
   }
 
@@ -272,8 +295,12 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
     replicaState.filter(r => r._1.topic.equals(topic) && r._2 == state).keySet
   }
 
+  def isAnyReplicaInState(topic: String, state: ReplicaState): Boolean = {
+    replicaState.exists(r => r._1.topic.equals(topic) && r._2 == state)
+  }
+
   def replicasInDeletionStates(topic: String): Set[PartitionAndReplica] = {
-    val deletionStates = Set(ReplicaDeletionStarted, ReplicaDeletionSuccessful, ReplicaDeletionFailed)
+    val deletionStates = Set(ReplicaDeletionStarted, ReplicaDeletionSuccessful, ReplicaDeletionIneligible)
     replicaState.filter(r => r._1.topic.equals(topic) && deletionStates.contains(r._2)).keySet
   }
 
@@ -286,7 +313,11 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
   }
 
   private def registerBrokerChangeListener() = {
-    zkClient.subscribeChildChanges(ZkUtils.BrokerIdsPath, new BrokerChangeListener())
+    zkClient.subscribeChildChanges(ZkUtils.BrokerIdsPath, brokerChangeListener)
+  }
+
+  private def deregisterBrokerChangeListener() = {
+    zkClient.unsubscribeChildChanges(ZkUtils.BrokerIdsPath, brokerChangeListener)
   }
 
   /**
@@ -304,8 +335,8 @@ class ReplicaStateMachine(controller: KafkaController) extends Logging {
           case false =>
             // mark replicas on dead brokers as failed for topic deletion, if they belong to a topic to be deleted.
             // This is required during controller failover since during controller failover a broker can go down,
-            // so the replicas on that broker should be moved to ReplicaDeletionFailed to be on the safer side.
-            replicaState.put(partitionAndReplica, ReplicaDeletionFailed)
+            // so the replicas on that broker should be moved to ReplicaDeletionIneligible to be on the safer side.
+            replicaState.put(partitionAndReplica, ReplicaDeletionIneligible)
         }
       }
     }
@@ -356,7 +387,5 @@ case object OnlineReplica extends ReplicaState { val state: Byte = 2 }
 case object OfflineReplica extends ReplicaState { val state: Byte = 3 }
 case object ReplicaDeletionStarted extends ReplicaState { val state: Byte = 4}
 case object ReplicaDeletionSuccessful extends ReplicaState { val state: Byte = 5}
-case object ReplicaDeletionFailed extends ReplicaState { val state: Byte = 6}
+case object ReplicaDeletionIneligible extends ReplicaState { val state: Byte = 6}
 case object NonExistentReplica extends ReplicaState { val state: Byte = 7 }
-
-
diff --git a/core/src/main/scala/kafka/controller/TopicDeletionManager.scala b/core/src/main/scala/kafka/controller/TopicDeletionManager.scala
index 91a446ddc8aeb..e56f22d739906 100644
--- a/core/src/main/scala/kafka/controller/TopicDeletionManager.scala
+++ b/core/src/main/scala/kafka/controller/TopicDeletionManager.scala
@@ -22,6 +22,8 @@ import kafka.utils.Utils._
 import collection.Set
 import kafka.common.{ErrorMapping, TopicAndPartition}
 import kafka.api.{StopReplicaResponse, RequestOrResponse}
+import java.util.concurrent.locks.ReentrantLock
+import java.util.concurrent.atomic.AtomicBoolean
 
 /**
  * This manages the state machine for topic deletion.
@@ -30,8 +32,8 @@ import kafka.api.{StopReplicaResponse, RequestOrResponse}
  * 3. The controller has a background thread that handles topic deletion. The purpose of having this background thread
  *    is to accommodate the TTL feature, when we have it. This thread is signaled whenever deletion for a topic needs to
  *    be started or resumed. Currently, a topic's deletion can be started only by the onPartitionDeletion callback on the
- *    controller. In the future, it can be triggered based on the configured TTL for the topic. A topic's deletion will
- *    be halted in the following scenarios -
+ *    controller. In the future, it can be triggered based on the configured TTL for the topic. A topic will be ineligible
+ *    for deletion in the following scenarios -
  *    3.1 broker hosting one of the replicas for that topic goes down
  *    3.2 partition reassignment for partitions of that topic is in progress
  *    3.3 preferred replica election for partitions of that topic is in progress
@@ -62,37 +64,51 @@ import kafka.api.{StopReplicaResponse, RequestOrResponse}
  *    it marks the topic for deletion retry.
  * @param controller
  * @param initialTopicsToBeDeleted The topics that are queued up for deletion in zookeeper at the time of controller failover
- * @param initialHaltedTopicsForDeletion The topics for which deletion is halted due to any of the conditions mentioned in #3 above
+ * @param initialTopicsIneligibleForDeletion The topics ineligible for deletion due to any of the conditions mentioned in #3 above
  */
 class TopicDeletionManager(controller: KafkaController,
                            initialTopicsToBeDeleted: Set[String] = Set.empty,
-                           initialHaltedTopicsForDeletion: Set[String] = Set.empty) extends Logging {
+                           initialTopicsIneligibleForDeletion: Set[String] = Set.empty) extends Logging {
+  this.logIdent = "[Topic Deletion Manager " + controller.config.brokerId + "], "
   val controllerContext = controller.controllerContext
   val partitionStateMachine = controller.partitionStateMachine
   val replicaStateMachine = controller.replicaStateMachine
-  var topicsToBeDeleted: mutable.Set[String] = mutable.Set.empty[String] ++ initialTopicsToBeDeleted
-  var haltedTopicsForDeletion: mutable.Set[String] = mutable.Set.empty[String] ++
-    (initialHaltedTopicsForDeletion & initialTopicsToBeDeleted)
-  val deleteTopicsCond = controllerContext.controllerLock.newCondition()
-  var deleteTopicStateChanged: Boolean = false
+  val topicsToBeDeleted: mutable.Set[String] = mutable.Set.empty[String] ++ initialTopicsToBeDeleted
+  val partitionsToBeDeleted: mutable.Set[TopicAndPartition] = topicsToBeDeleted.flatMap(controllerContext.partitionsForTopic)
+  val deleteLock = new ReentrantLock()
+  val topicsIneligibleForDeletion: mutable.Set[String] = mutable.Set.empty[String] ++
+    (initialTopicsIneligibleForDeletion & initialTopicsToBeDeleted)
+  val deleteTopicsCond = deleteLock.newCondition()
+  val deleteTopicStateChanged: AtomicBoolean = new AtomicBoolean(false)
   var deleteTopicsThread: DeleteTopicsThread = null
+  val isDeleteTopicEnabled = controller.config.deleteTopicEnable
 
   /**
    * Invoked at the end of new controller initiation
    */
   def start() {
-    deleteTopicsThread = new DeleteTopicsThread()
-    deleteTopicStateChanged = true
-    deleteTopicsThread.start()
+    if (isDeleteTopicEnabled) {
+      deleteTopicsThread = new DeleteTopicsThread()
+      if (topicsToBeDeleted.size > 0)
+        deleteTopicStateChanged.set(true)
+      deleteTopicsThread.start()
+    }
   }
 
   /**
-   * Invoked when the current controller resigns. At this time, all state for topic deletion should be cleared
+   * Invoked when the current controller resigns. At this time, all state for topic deletion should be cleared.
    */
   def shutdown() {
-    deleteTopicsThread.shutdown()
-    topicsToBeDeleted.clear()
-    haltedTopicsForDeletion.clear()
+    // Only allow one shutdown to go through
+    if (isDeleteTopicEnabled && deleteTopicsThread.initiateShutdown()) {
+      // Resume the topic deletion so it doesn't block on the condition
+      resumeTopicDeletionThread()
+      // Await delete topic thread to exit
+      deleteTopicsThread.awaitShutdown()
+      topicsToBeDeleted.clear()
+      partitionsToBeDeleted.clear()
+      topicsIneligibleForDeletion.clear()
+    }
   }
 
   /**
@@ -102,8 +118,11 @@ class TopicDeletionManager(controller: KafkaController,
    * @param topics Topics that should be deleted
    */
   def enqueueTopicsForDeletion(topics: Set[String]) {
-    topicsToBeDeleted ++= topics
-    resumeTopicDeletionThread()
+    if(isDeleteTopicEnabled) {
+      topicsToBeDeleted ++= topics
+      partitionsToBeDeleted ++= topics.flatMap(controllerContext.partitionsForTopic)
+      resumeTopicDeletionThread()
+    }
   }
 
   /**
@@ -115,30 +134,34 @@ class TopicDeletionManager(controller: KafkaController,
    * @param topics Topics for which deletion can be resumed
    */
   def resumeDeletionForTopics(topics: Set[String] = Set.empty) {
-    val topicsToResumeDeletion = topics & topicsToBeDeleted
-    if(topicsToResumeDeletion.size > 0) {
-      haltedTopicsForDeletion --= topicsToResumeDeletion
-      resumeTopicDeletionThread()
+    if(isDeleteTopicEnabled) {
+      val topicsToResumeDeletion = topics & topicsToBeDeleted
+      if(topicsToResumeDeletion.size > 0) {
+        topicsIneligibleForDeletion --= topicsToResumeDeletion
+        resumeTopicDeletionThread()
+      }
     }
   }
 
   /**
    * Invoked when a broker that hosts replicas for topics to be deleted goes down. Also invoked when the callback for
    * StopReplicaResponse receives an error code for the replicas of a topic to be deleted. As part of this, the replicas
-   * are moved from ReplicaDeletionStarted to ReplicaDeletionFailed state. Also, the topic is added to the list of topics
-   * for which deletion is halted until further notice. The delete topic thread is notified so it can retry topic deletion
+   * are moved from ReplicaDeletionStarted to ReplicaDeletionIneligible state. Also, the topic is added to the list of topics
+   * ineligible for deletion until further notice. The delete topic thread is notified so it can retry topic deletion
    * if it has received a response for all replicas of a topic to be deleted
    * @param replicas Replicas for which deletion has failed
    */
   def failReplicaDeletion(replicas: Set[PartitionAndReplica]) {
-    val replicasThatFailedToDelete = replicas.filter(r => isTopicQueuedUpForDeletion(r.topic))
-    if(replicasThatFailedToDelete.size > 0) {
-      val topics = replicasThatFailedToDelete.map(_.topic)
-      debug("Deletion failed for replicas %s. Halting deletion for topics %s"
-        .format(replicasThatFailedToDelete.mkString(","), topics))
-      controller.replicaStateMachine.handleStateChanges(replicasThatFailedToDelete, ReplicaDeletionFailed)
-      haltTopicDeletion(topics)
-      resumeTopicDeletionThread()
+    if(isDeleteTopicEnabled) {
+      val replicasThatFailedToDelete = replicas.filter(r => isTopicQueuedUpForDeletion(r.topic))
+      if(replicasThatFailedToDelete.size > 0) {
+        val topics = replicasThatFailedToDelete.map(_.topic)
+        debug("Deletion failed for replicas %s. Halting deletion for topics %s"
+          .format(replicasThatFailedToDelete.mkString(","), topics))
+        controller.replicaStateMachine.handleStateChanges(replicasThatFailedToDelete, ReplicaDeletionIneligible)
+        markTopicIneligibleForDeletion(topics)
+        resumeTopicDeletionThread()
+      }
     }
   }
 
@@ -147,25 +170,43 @@ class TopicDeletionManager(controller: KafkaController,
    * 1. replicas being down
    * 2. partition reassignment in progress for some partitions of the topic
    * 3. preferred replica election in progress for some partitions of the topic
-   * @param topics Topics for which deletion should be halted. No op if the topic is was not previously queued up for deletion
+   * @param topics Topics that should be marked ineligible for deletion. No op if the topic is was not previously queued up for deletion
    */
-  def haltTopicDeletion(topics: Set[String]) {
-    val newTopicsToHaltDeletion = topicsToBeDeleted & topics
-    haltedTopicsForDeletion ++= newTopicsToHaltDeletion
-    if(newTopicsToHaltDeletion.size > 0)
-      info("Halted deletion of topics %s".format(newTopicsToHaltDeletion.mkString(",")))
+  def markTopicIneligibleForDeletion(topics: Set[String]) {
+    if(isDeleteTopicEnabled) {
+      val newTopicsToHaltDeletion = topicsToBeDeleted & topics
+      topicsIneligibleForDeletion ++= newTopicsToHaltDeletion
+      if(newTopicsToHaltDeletion.size > 0)
+        info("Halted deletion of topics %s".format(newTopicsToHaltDeletion.mkString(",")))
+    }
   }
 
-  def isTopicDeletionHalted(topic: String): Boolean = {
-    haltedTopicsForDeletion.contains(topic)
+  def isTopicIneligibleForDeletion(topic: String): Boolean = {
+    if(isDeleteTopicEnabled) {
+      topicsIneligibleForDeletion.contains(topic)
+    } else
+      true
   }
 
   def isTopicDeletionInProgress(topic: String): Boolean = {
-    controller.replicaStateMachine.isAtLeastOneReplicaInDeletionStartedState(topic)
+    if(isDeleteTopicEnabled) {
+      controller.replicaStateMachine.isAtLeastOneReplicaInDeletionStartedState(topic)
+    } else
+      false
+  }
+
+  def isPartitionToBeDeleted(topicAndPartition: TopicAndPartition) = {
+    if(isDeleteTopicEnabled) {
+      partitionsToBeDeleted.contains(topicAndPartition)
+    } else
+      false
   }
 
   def isTopicQueuedUpForDeletion(topic: String): Boolean = {
-    topicsToBeDeleted.contains(topic)
+    if(isDeleteTopicEnabled) {
+      topicsToBeDeleted.contains(topic)
+    } else
+      false
   }
 
   /**
@@ -173,19 +214,22 @@ class TopicDeletionManager(controller: KafkaController,
    * controllerLock should be acquired before invoking this API
    */
   private def awaitTopicDeletionNotification() {
-    while(!deleteTopicStateChanged) {
-      info("Waiting for signal to start or continue topic deletion")
-      deleteTopicsCond.await()
+    inLock(deleteLock) {
+      while(deleteTopicsThread.isRunning.get() && !deleteTopicStateChanged.compareAndSet(true, false)) {
+        debug("Waiting for signal to start or continue topic deletion")
+        deleteTopicsCond.await()
+      }
     }
-    deleteTopicStateChanged = false
   }
 
   /**
    * Signals the delete-topic-thread to process topic deletion
    */
   private def resumeTopicDeletionThread() {
-    deleteTopicStateChanged = true
-    deleteTopicsCond.signal()
+    deleteTopicStateChanged.set(true)
+    inLock(deleteLock) {
+      deleteTopicsCond.signal()
+    }
   }
 
   /**
@@ -205,26 +249,31 @@ class TopicDeletionManager(controller: KafkaController,
    * Topic deletion can be retried if -
    * 1. Topic deletion is not already complete
    * 2. Topic deletion is currently not in progress for that topic
-   * 3. Topic deletion is currently halted for that topic
+   * 3. Topic is currently marked ineligible for deletion
    * @param topic Topic
    * @return Whether or not deletion can be retried for the topic
    */
   private def isTopicEligibleForDeletion(topic: String): Boolean = {
-    topicsToBeDeleted.contains(topic) && (!isTopicDeletionInProgress(topic) && !isTopicDeletionHalted(topic))
+    topicsToBeDeleted.contains(topic) && (!isTopicDeletionInProgress(topic) && !isTopicIneligibleForDeletion(topic))
   }
 
   /**
    * If the topic is queued for deletion but deletion is not currently under progress, then deletion is retried for that topic
-   * To ensure a successful retry, reset states for respective replicas from ReplicaDeletionFailed to OfflineReplica state
+   * To ensure a successful retry, reset states for respective replicas from ReplicaDeletionIneligible to OfflineReplica state
    *@param topic Topic for which deletion should be retried
    */
   private def markTopicForDeletionRetry(topic: String) {
-    // reset replica states from ReplicaDeletionFailed to OfflineReplica
-    val failedReplicas = controller.replicaStateMachine.replicasInState(topic, ReplicaDeletionFailed)
+    // reset replica states from ReplicaDeletionIneligible to OfflineReplica
+    val failedReplicas = controller.replicaStateMachine.replicasInState(topic, ReplicaDeletionIneligible)
+    info("Retrying delete topic for topic %s since replicas %s were not successfully deleted"
+      .format(topic, failedReplicas.mkString(",")))
     controller.replicaStateMachine.handleStateChanges(failedReplicas, OfflineReplica)
   }
 
   private def completeDeleteTopic(topic: String) {
+    // deregister partition change listener on the deleted topic. This is to prevent the partition change listener
+    // firing before the new topic listener when a deleted topic gets auto created
+    partitionStateMachine.deregisterPartitionChangeListener(topic)
     val replicasForDeletedTopic = controller.replicaStateMachine.replicasInState(topic, ReplicaDeletionSuccessful)
     // controller will remove this replica from the state machine as well as its partition assignment cache
     replicaStateMachine.handleStateChanges(replicasForDeletedTopic, NonExistentReplica)
@@ -233,6 +282,7 @@ class TopicDeletionManager(controller: KafkaController,
     partitionStateMachine.handleStateChanges(partitionsForDeletedTopic, OfflinePartition)
     partitionStateMachine.handleStateChanges(partitionsForDeletedTopic, NonExistentPartition)
     topicsToBeDeleted -= topic
+    partitionsToBeDeleted.retain(_.topic != topic)
     controllerContext.zkClient.deleteRecursive(ZkUtils.getTopicPath(topic))
     controllerContext.zkClient.deleteRecursive(ZkUtils.getTopicConfigPath(topic))
     controllerContext.zkClient.delete(ZkUtils.getDeleteTopicPath(topic))
@@ -241,10 +291,16 @@ class TopicDeletionManager(controller: KafkaController,
 
   /**
    * This callback is invoked by the DeleteTopics thread with the list of topics to be deleted
-   * It invokes the delete partition callback for all partitions of a topic
+   * It invokes the delete partition callback for all partitions of a topic.
+   * The updateMetadataRequest is also going to set the leader for the topics being deleted to
+   * {@link LeaderAndIsr#LeaderDuringDelete}. This lets each broker know that this topic is being deleted and can be
+   * removed from their caches.
    */
   private def onTopicDeletion(topics: Set[String]) {
     info("Topic deletion callback for %s".format(topics.mkString(",")))
+    // send update metadata so that brokers stop serving data for topics to be deleted
+    val partitions = topics.flatMap(controllerContext.partitionsForTopic)
+    controller.sendUpdateMetadataRequest(controllerContext.liveOrShuttingDownBrokerIds.toSeq, partitions)
     val partitionReplicaAssignmentByTopic = controllerContext.partitionReplicaAssignment.groupBy(p => p._1.topic)
     topics.foreach { topic =>
       onPartitionDeletion(partitionReplicaAssignmentByTopic(topic).map(_._1).toSet)
@@ -257,42 +313,41 @@ class TopicDeletionManager(controller: KafkaController,
    * the topics are added to the in progress list. As long as a topic is in the in progress list, deletion for that topic
    * is never retried. A topic is removed from the in progress list when
    * 1. Either the topic is successfully deleted OR
-   * 2. No replica for the topic is in ReplicaDeletionStarted state and at least one replica is in ReplicaDeletionFailed state
+   * 2. No replica for the topic is in ReplicaDeletionStarted state and at least one replica is in ReplicaDeletionIneligible state
    * If the topic is queued for deletion but deletion is not currently under progress, then deletion is retried for that topic
    * As part of starting deletion, all replicas are moved to the ReplicaDeletionStarted state where the controller sends
    * the replicas a StopReplicaRequest (delete=true)
    * This callback does the following things -
-   * 1. Send metadata request to all brokers excluding the topics to be deleted
-   * 2. Move all dead replicas directly to ReplicaDeletionFailed state. Also halt the deletion of respective topics if
-   *    some replicas are dead since it won't complete successfully anyway
-   * 3. Move all alive replicas to ReplicaDeletionStarted state so they can be deleted successfully
+   * 1. Move all dead replicas directly to ReplicaDeletionIneligible state. Also mark the respective topics ineligible
+   *    for deletion if some replicas are dead since it won't complete successfully anyway
+   * 2. Move all alive replicas to ReplicaDeletionStarted state so they can be deleted successfully
    *@param replicasForTopicsToBeDeleted
    */
   private def startReplicaDeletion(replicasForTopicsToBeDeleted: Set[PartitionAndReplica]) {
     replicasForTopicsToBeDeleted.groupBy(_.topic).foreach { case(topic, replicas) =>
-      // send update metadata so that brokers stop serving data
-      controller.sendUpdateMetadataRequest(controllerContext.liveOrShuttingDownBrokerIds.toSeq)
       var aliveReplicasForTopic = controllerContext.allLiveReplicas().filter(p => p.topic.equals(topic))
       val deadReplicasForTopic = replicasForTopicsToBeDeleted -- aliveReplicasForTopic
       val successfullyDeletedReplicas = controller.replicaStateMachine.replicasInState(topic, ReplicaDeletionSuccessful)
       val replicasForDeletionRetry = aliveReplicasForTopic -- successfullyDeletedReplicas
       // move dead replicas directly to failed state
-      replicaStateMachine.handleStateChanges(deadReplicasForTopic, ReplicaDeletionFailed)
+      replicaStateMachine.handleStateChanges(deadReplicasForTopic, ReplicaDeletionIneligible)
       // send stop replica to all followers that are not in the OfflineReplica state so they stop sending fetch requests to the leader
       replicaStateMachine.handleStateChanges(replicasForDeletionRetry, OfflineReplica)
       debug("Deletion started for replicas %s".format(replicasForDeletionRetry.mkString(",")))
       controller.replicaStateMachine.handleStateChanges(replicasForDeletionRetry, ReplicaDeletionStarted,
         new Callbacks.CallbackBuilder().stopReplicaCallback(deleteTopicStopReplicaCallback).build)
-      if(deadReplicasForTopic.size > 0)
-        haltTopicDeletion(Set(topic))
+      if(deadReplicasForTopic.size > 0) {
+        debug("Dead Replicas (%s) found for topic %s".format(deadReplicasForTopic.mkString(","), topic))
+        markTopicIneligibleForDeletion(Set(topic))
+      }
     }
   }
 
   /**
    * This callback is invoked by the delete topic callback with the list of partitions for topics to be deleted
    * It does the following -
-   * 1. Send UpdateMetadataRequest to all live brokers (that are not shutting down) with all partitions except those for
-   *    which the topics are being deleted. The brokers start rejecting all client requests with UnknownTopicOrPartitionException
+   * 1. Send UpdateMetadataRequest to all live brokers (that are not shutting down) for partitions that are being
+   *    deleted. The brokers start rejecting all client requests with UnknownTopicOrPartitionException
    * 2. Move all replicas for the partitions to OfflineReplica state. This will send StopReplicaRequest to the replicas
    *    and LeaderAndIsrRequest to the leader with the shrunk ISR. When the leader replica itself is moved to OfflineReplica state,
    *    it will skip sending the LeaderAndIsrRequest since the leader will be updated to -1
@@ -314,7 +369,7 @@ class TopicDeletionManager(controller: KafkaController,
       stopReplicaResponse.responseMap.filter(p => p._2 != ErrorMapping.NoError).map(_._1).toSet
     val replicasInError = partitionsInError.map(p => PartitionAndReplica(p.topic, p.partition, replicaId))
     inLock(controllerContext.controllerLock) {
-      // move all the failed replicas to ReplicaDeletionFailed
+      // move all the failed replicas to ReplicaDeletionIneligible
       failReplicaDeletion(replicasInError)
       if(replicasInError.size != stopReplicaResponse.responseMap.size) {
         // some replicas could have been successfully deleted
@@ -324,16 +379,22 @@ class TopicDeletionManager(controller: KafkaController,
     }
   }
 
-  class DeleteTopicsThread() extends ShutdownableThread("delete-topics-thread") {
+  class DeleteTopicsThread() extends ShutdownableThread(name = "delete-topics-thread-" + controller.config.brokerId, isInterruptible = false) {
     val zkClient = controllerContext.zkClient
     override def doWork() {
+      awaitTopicDeletionNotification()
+
+      if (!isRunning.get)
+        return
+
       inLock(controllerContext.controllerLock) {
-        awaitTopicDeletionNotification()
         val topicsQueuedForDeletion = Set.empty[String] ++ topicsToBeDeleted
-        if(topicsQueuedForDeletion.size > 0)
+
+        if(!topicsQueuedForDeletion.isEmpty)
           info("Handling deletion for topics " + topicsQueuedForDeletion.mkString(","))
+
         topicsQueuedForDeletion.foreach { topic =>
-          // if all replicas are marked as deleted successfully, then topic deletion is done
+        // if all replicas are marked as deleted successfully, then topic deletion is done
           if(controller.replicaStateMachine.areAllReplicasForTopicDeleted(topic)) {
             // clear up all state for this topic from controller cache and zookeeper
             completeDeleteTopic(topic)
@@ -348,13 +409,12 @@ class TopicDeletionManager(controller: KafkaController,
                 partitions.mkString(","), topic))
             } else {
               // if you come here, then no replica is in TopicDeletionStarted and all replicas are not in
-              // TopicDeletionSuccessful. That means, there is at least one failed replica, which means topic deletion
-              // should be retried
-              val replicasInTopicDeletionFailedState = controller.replicaStateMachine.replicasInState(topic, ReplicaDeletionFailed)
-              // mark topic for deletion retry
-              markTopicForDeletionRetry(topic)
-              info("Retrying delete topic for topic %s since replicas %s were not successfully deleted"
-                .format(topic, replicasInTopicDeletionFailedState.mkString(",")))
+              // TopicDeletionSuccessful. That means, that either given topic haven't initiated deletion
+              // or there is at least one failed replica (which means topic deletion should be retried).
+              if(controller.replicaStateMachine.isAnyReplicaInState(topic, ReplicaDeletionIneligible)) {
+                // mark topic for deletion retry
+                markTopicForDeletionRetry(topic)
+              }
             }
           }
           // Try delete topic if it is eligible for deletion.
@@ -362,12 +422,11 @@ class TopicDeletionManager(controller: KafkaController,
             info("Deletion of topic %s (re)started".format(topic))
             // topic deletion will be kicked off
             onTopicDeletion(Set(topic))
-          } else if(isTopicDeletionHalted(topic)) {
-            info("Not retrying deletion of topic %s at this time since it is halted".format(topic))
+          } else if(isTopicIneligibleForDeletion(topic)) {
+            info("Not retrying deletion of topic %s at this time since it is marked ineligible for deletion".format(topic))
           }
         }
       }
     }
   }
 }
-
diff --git a/core/src/main/scala/kafka/javaapi/ConsumerMetadataResponse.scala b/core/src/main/scala/kafka/javaapi/ConsumerMetadataResponse.scala
new file mode 100644
index 0000000000000..1b28861cdf7df
--- /dev/null
+++ b/core/src/main/scala/kafka/javaapi/ConsumerMetadataResponse.scala
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.javaapi
+
+import kafka.cluster.Broker
+
+class ConsumerMetadataResponse(private val underlying: kafka.api.ConsumerMetadataResponse) {
+
+  def errorCode = underlying.errorCode
+
+  def coordinator: Broker = {
+    import kafka.javaapi.Implicits._
+    underlying.coordinatorOpt
+  }
+
+  override def equals(other: Any) = canEqual(other) && {
+    val otherConsumerMetadataResponse = other.asInstanceOf[kafka.javaapi.ConsumerMetadataResponse]
+    this.underlying.equals(otherConsumerMetadataResponse.underlying)
+  }
+
+  def canEqual(other: Any) = other.isInstanceOf[kafka.javaapi.ConsumerMetadataResponse]
+
+  override def hashCode = underlying.hashCode
+
+  override def toString = underlying.toString
+
+}
diff --git a/core/src/main/scala/kafka/javaapi/OffsetCommitRequest.scala b/core/src/main/scala/kafka/javaapi/OffsetCommitRequest.scala
index 57b9d2a9c1974..27fc1eb9d578e 100644
--- a/core/src/main/scala/kafka/javaapi/OffsetCommitRequest.scala
+++ b/core/src/main/scala/kafka/javaapi/OffsetCommitRequest.scala
@@ -17,24 +17,21 @@
 
 package kafka.javaapi
 
-import kafka.common.{TopicAndPartition, OffsetMetadataAndError}
-import collection.JavaConversions
-import java.nio.ByteBuffer
+import kafka.common.{OffsetAndMetadata, TopicAndPartition}
 
 class OffsetCommitRequest(groupId: String,
-                          requestInfo: java.util.Map[TopicAndPartition, OffsetMetadataAndError],
-                          versionId: Short,
+                          requestInfo: java.util.Map[TopicAndPartition, OffsetAndMetadata],
                           correlationId: Int,
                           clientId: String) {
   val underlying = {
-    val scalaMap: Map[TopicAndPartition, OffsetMetadataAndError] = {
-      import JavaConversions._
+    val scalaMap: collection.immutable.Map[TopicAndPartition, OffsetAndMetadata] = {
+      import collection.JavaConversions._
+
       requestInfo.toMap
     }
     kafka.api.OffsetCommitRequest(
       groupId = groupId,
       requestInfo = scalaMap,
-      versionId = versionId,
       correlationId = correlationId,
       clientId = clientId
     )
diff --git a/core/src/main/scala/kafka/javaapi/OffsetCommitResponse.scala b/core/src/main/scala/kafka/javaapi/OffsetCommitResponse.scala
index 570bf3173f485..c2d3d114b82a8 100644
--- a/core/src/main/scala/kafka/javaapi/OffsetCommitResponse.scala
+++ b/core/src/main/scala/kafka/javaapi/OffsetCommitResponse.scala
@@ -24,7 +24,7 @@ class OffsetCommitResponse(private val underlying: kafka.api.OffsetCommitRespons
 
   def errors: java.util.Map[TopicAndPartition, Short] = {
     import JavaConversions._
-    underlying.requestInfo
+    underlying.commitStatus
   }
 
 }
diff --git a/core/src/main/scala/kafka/javaapi/TopicMetadata.scala b/core/src/main/scala/kafka/javaapi/TopicMetadata.scala
index d08c3f4af51e7..f384e04678df1 100644
--- a/core/src/main/scala/kafka/javaapi/TopicMetadata.scala
+++ b/core/src/main/scala/kafka/javaapi/TopicMetadata.scala
@@ -44,6 +44,8 @@ class TopicMetadata(private val underlying: kafka.api.TopicMetadata) {
   def errorCode: Short = underlying.errorCode
 
   def sizeInBytes: Int = underlying.sizeInBytes
+
+  override def toString = underlying.toString
 }
 
 
@@ -68,5 +70,6 @@ class PartitionMetadata(private val underlying: kafka.api.PartitionMetadata) {
   def errorCode: Short = underlying.errorCode
 
   def sizeInBytes: Int = underlying.sizeInBytes
-}
 
+  override def toString = underlying.toString
+}
diff --git a/core/src/main/scala/kafka/javaapi/TopicMetadataRequest.scala b/core/src/main/scala/kafka/javaapi/TopicMetadataRequest.scala
index 7e6da164a26b1..b0b7be14d494a 100644
--- a/core/src/main/scala/kafka/javaapi/TopicMetadataRequest.scala
+++ b/core/src/main/scala/kafka/javaapi/TopicMetadataRequest.scala
@@ -24,10 +24,10 @@ import kafka.common.ErrorMapping
 import kafka.network.RequestChannel.Response
 
 class TopicMetadataRequest(val versionId: Short,
-                           override val correlationId: Int,
+                           val correlationId: Int,
                            val clientId: String,
                            val topics: java.util.List[String])
-    extends RequestOrResponse(Some(kafka.api.RequestKeys.MetadataKey), correlationId) {
+    extends RequestOrResponse(Some(kafka.api.RequestKeys.MetadataKey)) {
 
   val underlying: kafka.api.TopicMetadataRequest = {
     import scala.collection.JavaConversions._
diff --git a/core/src/main/scala/kafka/javaapi/TopicMetadataResponse.scala b/core/src/main/scala/kafka/javaapi/TopicMetadataResponse.scala
index 252a0c9d6c12b..335906055a694 100644
--- a/core/src/main/scala/kafka/javaapi/TopicMetadataResponse.scala
+++ b/core/src/main/scala/kafka/javaapi/TopicMetadataResponse.scala
@@ -23,4 +23,15 @@ class TopicMetadataResponse(private val underlying: kafka.api.TopicMetadataRespo
     import kafka.javaapi.MetadataListImplicits._
     underlying.topicsMetadata
   }
+
+  override def equals(other: Any) = canEqual(other) && {
+    val otherTopicMetadataResponse = other.asInstanceOf[kafka.javaapi.TopicMetadataResponse]
+    this.underlying.equals(otherTopicMetadataResponse.underlying)
+  }
+
+  def canEqual(other: Any) = other.isInstanceOf[kafka.javaapi.TopicMetadataResponse]
+
+  override def hashCode = underlying.hashCode
+
+  override def toString = underlying.toString
 }
diff --git a/core/src/main/scala/kafka/javaapi/consumer/ConsumerConnector.java b/core/src/main/scala/kafka/javaapi/consumer/ConsumerConnector.java
index c45c80399c21f..cc3400ff81fc0 100644
--- a/core/src/main/scala/kafka/javaapi/consumer/ConsumerConnector.java
+++ b/core/src/main/scala/kafka/javaapi/consumer/ConsumerConnector.java
@@ -18,19 +18,19 @@
 package kafka.javaapi.consumer;
 
 
+import java.util.List;
+import java.util.Map;
 import kafka.consumer.KafkaStream;
 import kafka.consumer.TopicFilter;
 import kafka.serializer.Decoder;
 
-import java.util.List;
-import java.util.Map;
-
 public interface ConsumerConnector {
   /**
    *  Create a list of MessageStreams of type T for each topic.
    *
    *  @param topicCountMap  a map of (topic, #streams) pair
-   *  @param decoder a decoder that converts from Message to T
+   *  @param keyDecoder a decoder that decodes the message key
+   *  @param valueDecoder a decoder that decodes the message itself
    *  @return a map of (topic, list of  KafkaStream) pairs.
    *          The number of items in the list is #streams. Each stream supports
    *          an iterator over message/metadata pairs.
@@ -62,6 +62,7 @@ public interface ConsumerConnector {
    *  Commit the offsets of all broker partitions connected by this connector.
    */
   public void commitOffsets();
+  public void commitOffsets(boolean retryOnFailure);
 
   /**
    *  Shut down the connector
diff --git a/core/src/main/scala/kafka/javaapi/consumer/ConsumerRebalanceListener.java b/core/src/main/scala/kafka/javaapi/consumer/ConsumerRebalanceListener.java
new file mode 100644
index 0000000000000..facf509841918
--- /dev/null
+++ b/core/src/main/scala/kafka/javaapi/consumer/ConsumerRebalanceListener.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.javaapi.consumer;
+
+import kafka.common.TopicAndPartition;
+import kafka.consumer.ConsumerThreadId;
+
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * This listener is used for execution of tasks defined by user when a consumer rebalance
+ * occurs in {@link kafka.consumer.ZookeeperConsumerConnector}
+ */
+public interface ConsumerRebalanceListener {
+
+    /**
+     * This method is called after all the fetcher threads are stopped but before the
+     * ownership of partitions are released. Depending on whether auto offset commit is
+     * enabled or not, offsets may or may not have been committed.
+     * This listener is initially added to prevent duplicate messages on consumer rebalance
+     * in mirror maker, where offset auto commit is disabled to prevent data loss. It could
+     * also be used in more general cases.
+     */
+    public void beforeReleasingPartitions(Map<String, Set<Integer>> partitionOwnership);
+
+}
diff --git a/core/src/main/scala/kafka/javaapi/consumer/ZookeeperConsumerConnector.scala b/core/src/main/scala/kafka/javaapi/consumer/ZookeeperConsumerConnector.scala
index 58e83f6119d32..9baad34a9793e 100644
--- a/core/src/main/scala/kafka/javaapi/consumer/ZookeeperConsumerConnector.scala
+++ b/core/src/main/scala/kafka/javaapi/consumer/ZookeeperConsumerConnector.scala
@@ -18,9 +18,10 @@ package kafka.javaapi.consumer
 
 import kafka.serializer._
 import kafka.consumer._
+import kafka.common.MessageStreamsExistException
 import scala.collection.mutable
 import scala.collection.JavaConversions
-
+import java.util.concurrent.atomic.AtomicBoolean
 
 /**
  * This class handles the consumers interaction with zookeeper
@@ -63,6 +64,7 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     extends ConsumerConnector {
 
   private val underlying = new kafka.consumer.ZookeeperConsumerConnector(config, enableFetcher)
+  private val messageStreamCreated = new AtomicBoolean(false)
 
   def this(config: ConsumerConfig) = this(config, true)
 
@@ -73,6 +75,9 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
         valueDecoder: Decoder[V])
       : java.util.Map[String,java.util.List[KafkaStream[K,V]]] = {
 
+    if (messageStreamCreated.getAndSet(true))
+      throw new MessageStreamsExistException(this.getClass.getSimpleName +
+                                   " can create message streams at most once",null)
     val scalaTopicCountMap: Map[String, Int] = {
       import JavaConversions._
       Map.empty[String, Int] ++ (topicCountMap.asInstanceOf[java.util.Map[String, Int]]: mutable.Map[String, Int])
@@ -87,23 +92,31 @@ private[kafka] class ZookeeperConsumerConnector(val config: ConsumerConfig,
     }
     ret
   }
-  
+
   def createMessageStreams(topicCountMap: java.util.Map[String,java.lang.Integer]): java.util.Map[String,java.util.List[KafkaStream[Array[Byte],Array[Byte]]]] =
     createMessageStreams(topicCountMap, new DefaultDecoder(), new DefaultDecoder())
-    
+
   def createMessageStreamsByFilter[K,V](topicFilter: TopicFilter, numStreams: Int, keyDecoder: Decoder[K], valueDecoder: Decoder[V]) = {
     import JavaConversions._
     underlying.createMessageStreamsByFilter(topicFilter, numStreams, keyDecoder, valueDecoder)
   }
 
-  def createMessageStreamsByFilter(topicFilter: TopicFilter, numStreams: Int) = 
+  def createMessageStreamsByFilter(topicFilter: TopicFilter, numStreams: Int) =
     createMessageStreamsByFilter(topicFilter, numStreams, new DefaultDecoder(), new DefaultDecoder())
-    
-  def createMessageStreamsByFilter(topicFilter: TopicFilter) = 
+
+  def createMessageStreamsByFilter(topicFilter: TopicFilter) =
     createMessageStreamsByFilter(topicFilter, 1, new DefaultDecoder(), new DefaultDecoder())
-    
+
   def commitOffsets() {
-    underlying.commitOffsets
+    underlying.commitOffsets(true)
+  }
+
+  def commitOffsets(retryOnFailure: Boolean) {
+    underlying.commitOffsets(retryOnFailure)
+  }
+
+  def setConsumerRebalanceListener(consumerRebalanceListener: ConsumerRebalanceListener) {
+    underlying.setConsumerRebalanceListener(consumerRebalanceListener)
   }
 
   def shutdown() {
diff --git a/core/src/main/scala/kafka/javaapi/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/javaapi/message/ByteBufferMessageSet.scala
index fecee8d5f7b32..0125565c84aea 100644
--- a/core/src/main/scala/kafka/javaapi/message/ByteBufferMessageSet.scala
+++ b/core/src/main/scala/kafka/javaapi/message/ByteBufferMessageSet.scala
@@ -17,12 +17,11 @@
 package kafka.javaapi.message
 
 import java.util.concurrent.atomic.AtomicLong
-import scala.reflect.BeanProperty
 import java.nio.ByteBuffer
 import kafka.message._
 import kafka.javaapi.Implicits.javaListToScalaBuffer
 
-class ByteBufferMessageSet(@BeanProperty val buffer: ByteBuffer) extends MessageSet {
+class ByteBufferMessageSet(val buffer: ByteBuffer) extends MessageSet {
   private val underlying: kafka.message.ByteBufferMessageSet = new kafka.message.ByteBufferMessageSet(buffer)
   
   def this(compressionCodec: CompressionCodec, messages: java.util.List[Message]) {
@@ -36,6 +35,8 @@ class ByteBufferMessageSet(@BeanProperty val buffer: ByteBuffer) extends Message
 
   def validBytes: Int = underlying.validBytes
 
+  def getBuffer = buffer
+
   override def iterator: java.util.Iterator[MessageAndOffset] = new java.util.Iterator[MessageAndOffset] {
     val underlyingIterator = underlying.iterator
     override def hasNext(): Boolean = {
diff --git a/core/src/main/scala/kafka/log/CleanerConfig.scala b/core/src/main/scala/kafka/log/CleanerConfig.scala
index fa946ada92602..ade838672d671 100644
--- a/core/src/main/scala/kafka/log/CleanerConfig.scala
+++ b/core/src/main/scala/kafka/log/CleanerConfig.scala
@@ -35,7 +35,7 @@ case class CleanerConfig(val numThreads: Int = 1,
                          val ioBufferSize: Int = 1024*1024,
                          val maxMessageSize: Int = 32*1024*1024,
                          val maxIoBytesPerSecond: Double = Double.MaxValue,
-                         val backOffMs: Long = 60 * 1000,
+                         val backOffMs: Long = 15 * 1000,
                          val enableCleaner: Boolean = true,
                          val hashAlgorithm: String = "MD5") {
 }
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/log/FileMessageSet.scala b/core/src/main/scala/kafka/log/FileMessageSet.scala
index e1f8b979c3e6f..b2652ddbe2f85 100644
--- a/core/src/main/scala/kafka/log/FileMessageSet.scala
+++ b/core/src/main/scala/kafka/log/FileMessageSet.scala
@@ -255,8 +255,8 @@ class FileMessageSet private[kafka](@volatile var file: File,
   /**
    * Read from the underlying file into the buffer starting at the given position
    */
-  def readInto(buffer: ByteBuffer, position: Int): ByteBuffer = {
-    channel.read(buffer, position)
+  def readInto(buffer: ByteBuffer, relativePosition: Int): ByteBuffer = {
+    channel.read(buffer, relativePosition + this.start)
     buffer.flip()
     buffer
   }
diff --git a/core/src/main/scala/kafka/log/Log.scala b/core/src/main/scala/kafka/log/Log.scala
index b3ab5220a66a2..024506cd00556 100644
--- a/core/src/main/scala/kafka/log/Log.scala
+++ b/core/src/main/scala/kafka/log/Log.scala
@@ -17,17 +17,35 @@
 
 package kafka.log
 
-import java.io.{IOException, File}
-import java.util.concurrent.{ConcurrentNavigableMap, ConcurrentSkipListMap}
-import java.util.concurrent.atomic._
 import kafka.utils._
-import scala.collection.JavaConversions
-import java.text.NumberFormat
 import kafka.message._
 import kafka.common._
 import kafka.metrics.KafkaMetricsGroup
+import kafka.server.{LogOffsetMetadata, FetchDataInfo, BrokerTopicStats}
+
+import java.io.{IOException, File}
+import java.util.concurrent.{ConcurrentNavigableMap, ConcurrentSkipListMap}
+import java.util.concurrent.atomic._
+import java.text.NumberFormat
+import scala.collection.JavaConversions
+
 import com.yammer.metrics.core.Gauge
 
+object LogAppendInfo {
+  val UnknownLogAppendInfo = LogAppendInfo(-1, -1, NoCompressionCodec, -1, -1, false)
+}
+
+/**
+ * Struct to hold various quantities we compute about each message set before appending to the log
+ * @param firstOffset The first offset in the message set
+ * @param lastOffset The last offset in the message set
+ * @param shallowCount The number of shallow messages
+ * @param validBytes The number of valid bytes
+ * @param codec The codec used in the message set
+ * @param offsetsMonotonic Are the offsets in this message set monotonically increasing
+ */
+case class LogAppendInfo(var firstOffset: Long, var lastOffset: Long, codec: CompressionCodec, shallowCount: Int, validBytes: Int, offsetsMonotonic: Boolean)
+
 
 /**
  * An append-only log for storing messages.
@@ -48,11 +66,11 @@ import com.yammer.metrics.core.Gauge
 class Log(val dir: File,
           @volatile var config: LogConfig,
           @volatile var recoveryPoint: Long = 0L,
-          val scheduler: Scheduler,
+          scheduler: Scheduler,
           time: Time = SystemTime) extends Logging with KafkaMetricsGroup {
 
   import kafka.log.Log._
-  
+
   /* A lock that guards all modifications to the log */
   private val lock = new Object
 
@@ -64,17 +82,37 @@ class Log(val dir: File,
   loadSegments()
   
   /* Calculate the offset of the next message */
-  private val nextOffset: AtomicLong = new AtomicLong(activeSegment.nextOffset())
+  @volatile var nextOffsetMetadata = new LogOffsetMetadata(activeSegment.nextOffset(), activeSegment.baseOffset, activeSegment.size.toInt)
 
-  val topicAndPartition: TopicAndPartition = Log.parseTopicPartitionName(name)
+  val topicAndPartition: TopicAndPartition = Log.parseTopicPartitionName(dir)
 
   info("Completed load of log %s with log end offset %d".format(name, logEndOffset))
 
-  newGauge(name + "-" + "NumLogSegments",
-           new Gauge[Int] { def value = numberOfSegments })
+  val tags = Map("topic" -> topicAndPartition.topic, "partition" -> topicAndPartition.partition.toString)
+
+  newGauge("NumLogSegments",
+    new Gauge[Int] {
+      def value = numberOfSegments
+    },
+    tags)
 
-  newGauge(name + "-" + "LogEndOffset",
-           new Gauge[Long] { def value = logEndOffset })
+  newGauge("LogStartOffset",
+    new Gauge[Long] {
+      def value = logStartOffset
+    },
+    tags)
+
+  newGauge("LogEndOffset",
+    new Gauge[Long] {
+      def value = logEndOffset
+    },
+    tags)
+
+  newGauge("Size",
+    new Gauge[Long] {
+      def value = size
+    },
+    tags)
 
   /** The name of this log */
   def name  = dir.getName()
@@ -132,6 +170,7 @@ class Log(val dir: File,
                                      startOffset = start,
                                      indexIntervalBytes = config.indexInterval, 
                                      maxIndexSize = config.maxIndexSize,
+                                     rollJitterMs = config.randomSegmentJitter,
                                      time = time)
         if(!hasIndex) {
           error("Could not find index file corresponding to log file %s, rebuilding index...".format(segment.log.file.getAbsolutePath))
@@ -143,10 +182,11 @@ class Log(val dir: File,
 
     if(logSegments.size == 0) {
       // no existing segments, create a new mutable segment beginning at offset 0
-      segments.put(0, new LogSegment(dir = dir, 
+      segments.put(0L, new LogSegment(dir = dir,
                                      startOffset = 0,
                                      indexIntervalBytes = config.indexInterval, 
                                      maxIndexSize = config.maxIndexSize,
+                                     rollJitterMs = config.randomSegmentJitter,
                                      time = time))
     } else {
       recoverLog()
@@ -158,6 +198,10 @@ class Log(val dir: File,
     for (s <- logSegments)
       s.index.sanityCheck()
   }
+
+  private def updateLogEndOffset(messageOffset: Long) {
+    nextOffsetMetadata = new LogOffsetMetadata(messageOffset, activeSegment.baseOffset, activeSegment.size.toInt)
+  }
   
   private def recoverLog() {
     // if we have the clean shutdown marker, skip recovery
@@ -232,19 +276,16 @@ class Log(val dir: File,
       return appendInfo
       
     // trim any invalid bytes or partial messages before appending it to the on-disk log
-    var validMessages = trimInvalidBytes(messages)
+    var validMessages = trimInvalidBytes(messages, appendInfo)
 
     try {
       // they are valid, insert them in the log
       lock synchronized {
-        appendInfo.firstOffset = nextOffset.get
-
-        // maybe roll the log if this segment is full
-        val segment = maybeRoll()
+        appendInfo.firstOffset = nextOffsetMetadata.messageOffset
 
         if(assignOffsets) {
-          // assign offsets to the messageset
-          val offset = new AtomicLong(nextOffset.get)
+          // assign offsets to the message set
+          val offset = new AtomicLong(nextOffsetMetadata.messageOffset)
           try {
             validMessages = validMessages.assignOffsets(offset, appendInfo.codec)
           } catch {
@@ -253,26 +294,40 @@ class Log(val dir: File,
           appendInfo.lastOffset = offset.get - 1
         } else {
           // we are taking the offsets we are given
-          if(!appendInfo.offsetsMonotonic || appendInfo.firstOffset < nextOffset.get)
+          if(!appendInfo.offsetsMonotonic || appendInfo.firstOffset < nextOffsetMetadata.messageOffset)
             throw new IllegalArgumentException("Out of order offsets found in " + messages)
         }
 
-        // Check if the message sizes are valid. This check is done after assigning offsets to ensure the comparison
-        // happens with the new message size (after re-compression, if any)
+        // re-validate message sizes since after re-compression some may exceed the limit
         for(messageAndOffset <- validMessages.shallowIterator) {
-          if(MessageSet.entrySize(messageAndOffset.message) > config.maxMessageSize)
+          if(MessageSet.entrySize(messageAndOffset.message) > config.maxMessageSize) {
+            // we record the original message set size instead of trimmed size
+            // to be consistent with pre-compression bytesRejectedRate recording
+            BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).bytesRejectedRate.mark(messages.sizeInBytes)
+            BrokerTopicStats.getBrokerAllTopicsStats.bytesRejectedRate.mark(messages.sizeInBytes)
             throw new MessageSizeTooLargeException("Message size is %d bytes which exceeds the maximum configured message size of %d."
               .format(MessageSet.entrySize(messageAndOffset.message), config.maxMessageSize))
+          }
         }
 
+        // check messages set size may be exceed config.segmentSize
+        if(validMessages.sizeInBytes > config.segmentSize) {
+          throw new MessageSetSizeTooLargeException("Message set size is %d bytes which exceeds the maximum configured segment size of %d."
+            .format(validMessages.sizeInBytes, config.segmentSize))
+        }
+
+
+        // maybe roll the log if this segment is full
+        val segment = maybeRoll(validMessages.sizeInBytes)
+
         // now append to the log
         segment.append(appendInfo.firstOffset, validMessages)
 
         // increment the log end offset
-        nextOffset.set(appendInfo.lastOffset + 1)
+        updateLogEndOffset(appendInfo.lastOffset + 1)
 
         trace("Appended message set to log %s with first offset: %d, next offset: %d, and messages: %s"
-                .format(this.name, appendInfo.firstOffset, nextOffset.get(), validMessages))
+                .format(this.name, appendInfo.firstOffset, nextOffsetMetadata.messageOffset, validMessages))
 
         if(unflushedMessages >= config.flushInterval)
           flush()
@@ -284,18 +339,11 @@ class Log(val dir: File,
     }
   }
   
-  /** Struct to hold various quantities we compute about each message set before appending to the log
-   * @param firstOffset The first offset in the message set
-   * @param lastOffset The last offset in the message set
-   * @param codec The codec used in the message set
-   * @param offsetsMonotonic Are the offsets in this message set monotonically increasing
-   */
-  case class LogAppendInfo(var firstOffset: Long, var lastOffset: Long, codec: CompressionCodec, shallowCount: Int, offsetsMonotonic: Boolean)
-  
   /**
    * Validate the following:
    * <ol>
    * <li> each message matches its CRC
+   * <li> each message size is valid
    * </ol>
    * 
    * Also compute the following quantities:
@@ -303,12 +351,14 @@ class Log(val dir: File,
    * <li> First offset in the message set
    * <li> Last offset in the message set
    * <li> Number of messages
+   * <li> Number of valid bytes
    * <li> Whether the offsets are monotonically increasing
    * <li> Whether any compression codec is used (if many are used, then the last one is given)
    * </ol>
    */
   private def analyzeAndValidateMessageSet(messages: ByteBufferMessageSet): LogAppendInfo = {
-    var messageCount = 0
+    var shallowMessageCount = 0
+    var validBytesCount = 0
     var firstOffset, lastOffset = -1L
     var codec: CompressionCodec = NoCompressionCodec
     var monotonic = true
@@ -322,25 +372,38 @@ class Log(val dir: File,
       // update the last offset seen
       lastOffset = messageAndOffset.offset
 
-      // check the validity of the message by checking CRC
       val m = messageAndOffset.message
+
+      // Check if the message sizes are valid.
+      val messageSize = MessageSet.entrySize(m)
+      if(messageSize > config.maxMessageSize) {
+        BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).bytesRejectedRate.mark(messages.sizeInBytes)
+        BrokerTopicStats.getBrokerAllTopicsStats.bytesRejectedRate.mark(messages.sizeInBytes)
+        throw new MessageSizeTooLargeException("Message size is %d bytes which exceeds the maximum configured message size of %d."
+          .format(messageSize, config.maxMessageSize))
+      }
+
+      // check the validity of the message by checking CRC
       m.ensureValid()
-      messageCount += 1;
+
+      shallowMessageCount += 1
+      validBytesCount += messageSize
       
       val messageCodec = m.compressionCodec
       if(messageCodec != NoCompressionCodec)
         codec = messageCodec
     }
-    LogAppendInfo(firstOffset, lastOffset, codec, messageCount, monotonic)
+    LogAppendInfo(firstOffset, lastOffset, codec, shallowMessageCount, validBytesCount, monotonic)
   }
   
   /**
    * Trim any invalid bytes from the end of this message set (if there are any)
    * @param messages The message set to trim
+   * @param info The general information of the message set
    * @return A trimmed message set. This may be the same as what was passed in or it may not.
    */
-  private def trimInvalidBytes(messages: ByteBufferMessageSet): ByteBufferMessageSet = {
-    val messageSetValidBytes = messages.validBytes
+  private def trimInvalidBytes(messages: ByteBufferMessageSet, info: LogAppendInfo): ByteBufferMessageSet = {
+    val messageSetValidBytes = info.validBytes
     if(messageSetValidBytes < 0)
       throw new InvalidMessageSizeException("Illegal length of message set " + messageSetValidBytes + " Message set cannot be appended to log. Possible causes are corrupted produce requests")
     if(messageSetValidBytes == messages.sizeInBytes) {
@@ -355,20 +418,21 @@ class Log(val dir: File,
 
   /**
    * Read messages from the log
+   *
    * @param startOffset The offset to begin reading at
    * @param maxLength The maximum number of bytes to read
    * @param maxOffset -The offset to read up to, exclusive. (i.e. the first offset NOT included in the resulting message set).
    * 
    * @throws OffsetOutOfRangeException If startOffset is beyond the log end offset or before the base offset of the first segment.
-   * @return The messages read
+   * @return The fetch data information including fetch starting offset metadata and messages read
    */
-  def read(startOffset: Long, maxLength: Int, maxOffset: Option[Long] = None): MessageSet = {
+  def read(startOffset: Long, maxLength: Int, maxOffset: Option[Long] = None): FetchDataInfo = {
     trace("Reading %d bytes from offset %d in log %s of length %d bytes".format(maxLength, startOffset, name, size))
 
     // check if the offset is valid and in range
-    val next = nextOffset.get
+    val next = nextOffsetMetadata.messageOffset
     if(startOffset == next)
-      return MessageSet.Empty
+      return FetchDataInfo(nextOffsetMetadata, MessageSet.Empty)
     
     var entry = segments.floorEntry(startOffset)
       
@@ -380,15 +444,31 @@ class Log(val dir: File,
     // but if that segment doesn't contain any messages with an offset greater than that
     // continue to read from successive segments until we get some messages or we reach the end of the log
     while(entry != null) {
-      val messages = entry.getValue.read(startOffset, maxOffset, maxLength)
-      if(messages == null)
+      val fetchInfo = entry.getValue.read(startOffset, maxOffset, maxLength)
+      if(fetchInfo == null) {
         entry = segments.higherEntry(entry.getKey)
-      else
-        return messages
+      } else {
+        return fetchInfo
+      }
     }
     
-    // okay we are beyond the end of the last segment but less than the log end offset
-    MessageSet.Empty
+    // okay we are beyond the end of the last segment with no data fetched although the start offset is in range,
+    // this can happen when all messages with offset larger than start offsets have been deleted.
+    // In this case, we will return the empty set with log end offset metadata
+    FetchDataInfo(nextOffsetMetadata, MessageSet.Empty)
+  }
+
+  /**
+   * Given a message offset, find its corresponding offset metadata in the log.
+   * If the message offset is out of range, return unknown offset metadata
+   */
+  def convertToOffsetMetadata(offset: Long): LogOffsetMetadata = {
+    try {
+      val fetchDataInfo = read(offset, 1)
+      fetchDataInfo.fetchOffset
+    } catch {
+      case e: OffsetOutOfRangeException => LogOffsetMetadata.UnknownOffsetMetadata
+    }
   }
 
   /**
@@ -401,7 +481,7 @@ class Log(val dir: File,
     // find any segments that match the user-supplied predicate UNLESS it is the final segment 
     // and it is empty (since we would just end up re-creating it
     val lastSegment = activeSegment
-    var deletable = logSegments.takeWhile(s => predicate(s) && (s.baseOffset != lastSegment.baseOffset || s.size > 0))
+    val deletable = logSegments.takeWhile(s => predicate(s) && (s.baseOffset != lastSegment.baseOffset || s.size > 0))
     val numToDelete = deletable.size
     if(numToDelete > 0) {
       lock synchronized {
@@ -420,19 +500,37 @@ class Log(val dir: File,
    */
   def size: Long = logSegments.map(_.size).sum
 
+   /**
+   * The earliest message offset in the log
+   */
+  def logStartOffset: Long = logSegments.head.baseOffset
+
+  /**
+   * The offset metadata of the next message that will be appended to the log
+   */
+  def logEndOffsetMetadata: LogOffsetMetadata = nextOffsetMetadata
+
   /**
    *  The offset of the next message that will be appended to the log
    */
-  def logEndOffset: Long = nextOffset.get
+  def logEndOffset: Long = nextOffsetMetadata.messageOffset
 
   /**
-   * Roll the log over to a new empty log segment if necessary
+   * Roll the log over to a new empty log segment if necessary.
+   *
+   * @param messagesSize The messages set size in bytes
+   * logSegment will be rolled if one of the following conditions met
+   * <ol>
+   * <li> The logSegment is full
+   * <li> The maxTime has elapsed
+   * <li> The index is full
+   * </ol>
    * @return The currently active segment after (perhaps) rolling to a new segment
    */
-  private def maybeRoll(): LogSegment = {
+  private def maybeRoll(messagesSize: Int): LogSegment = {
     val segment = activeSegment
-    if (segment.size > config.segmentSize || 
-        segment.size > 0 && time.milliseconds - segment.created > config.segmentMs ||
+    if (segment.size > config.segmentSize - messagesSize ||
+        segment.size > 0 && time.milliseconds - segment.created > config.segmentMs - segment.rollJitterMs ||
         segment.index.isFull) {
       debug("Rolling new log segment in %s (log_size = %d/%d, index_size = %d/%d, age_ms = %d/%d)."
             .format(name,
@@ -441,7 +539,7 @@ class Log(val dir: File,
                     segment.index.entries,
                     segment.index.maxEntries,
                     time.milliseconds - segment.created,
-                    config.segmentMs))
+                    config.segmentMs - segment.rollJitterMs))
       roll()
     } else {
       segment
@@ -472,6 +570,7 @@ class Log(val dir: File,
                                    startOffset = newOffset,
                                    indexIntervalBytes = config.indexInterval, 
                                    maxIndexSize = config.maxIndexSize,
+                                   rollJitterMs = config.randomSegmentJitter,
                                    time = time)
       val prev = addSegment(segment)
       if(prev != null)
@@ -545,7 +644,7 @@ class Log(val dir: File,
         val deletable = logSegments.filter(segment => segment.baseOffset > targetOffset)
         deletable.foreach(deleteSegment(_))
         activeSegment.truncateTo(targetOffset)
-        this.nextOffset.set(targetOffset)
+        updateLogEndOffset(targetOffset)
         this.recoveryPoint = math.min(targetOffset, this.recoveryPoint)
       }
     }
@@ -564,8 +663,9 @@ class Log(val dir: File,
                                 newOffset,
                                 indexIntervalBytes = config.indexInterval, 
                                 maxIndexSize = config.maxIndexSize,
+                                rollJitterMs = config.randomSegmentJitter,
                                 time = time))
-      this.nextOffset.set(newOffset)
+      updateLogEndOffset(newOffset)
       this.recoveryPoint = math.min(newOffset, this.recoveryPoint)
     }
   }
@@ -732,9 +832,25 @@ object Log {
   /**
    * Parse the topic and partition out of the directory name of a log
    */
-  def parseTopicPartitionName(name: String): TopicAndPartition = {
+  def parseTopicPartitionName(dir: File): TopicAndPartition = {
+    val name: String = dir.getName
+    if (name == null || name.isEmpty || !name.contains('-')) {
+      throwException(dir)
+    }
     val index = name.lastIndexOf('-')
-    TopicAndPartition(name.substring(0,index), name.substring(index+1).toInt)
+    val topic: String = name.substring(0, index)
+    val partition: String = name.substring(index + 1)
+    if (topic.length < 1 || partition.length < 1) {
+      throwException(dir)
+    }
+    TopicAndPartition(topic, partition.toInt)
+  }
+
+  def throwException(dir: File) {
+    throw new KafkaException("Found directory " + dir.getCanonicalPath + ", " +
+      "'" + dir.getName + "' is not in the form of topic-partition\n" +
+      "If a directory does not contain Kafka topic data it should not exist in Kafka's log " +
+      "directory")
   }
 }
   
diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala
index 64046472a1f88..f8fcb843c80ee 100644
--- a/core/src/main/scala/kafka/log/LogCleaner.scala
+++ b/core/src/main/scala/kafka/log/LogCleaner.scala
@@ -17,15 +17,21 @@
 
 package kafka.log
 
+import kafka.common._
+import kafka.message._
+import kafka.utils._
+import kafka.metrics.KafkaMetricsGroup
+
 import scala.collection._
 import scala.math
 import java.nio._
 import java.util.Date
 import java.io.File
-import kafka.common._
-import kafka.message._
-import kafka.utils._
 import java.lang.IllegalStateException
+import java.util.concurrent.CountDownLatch
+import java.util.concurrent.TimeUnit
+
+import com.yammer.metrics.core.Gauge
 
 /**
  * The cleaner is responsible for removing obsolete records from logs which have the dedupe retention strategy.
@@ -63,7 +69,8 @@ import java.lang.IllegalStateException
 class LogCleaner(val config: CleanerConfig,
                  val logDirs: Array[File],
                  val logs: Pool[TopicAndPartition, Log], 
-                 time: Time = SystemTime) extends Logging {
+                 time: Time = SystemTime) extends Logging with KafkaMetricsGroup {
+  
   /* for managing the state of partitions being cleaned. */
   private val cleanerManager = new LogCleanerManager(logDirs, logs);
 
@@ -71,11 +78,33 @@ class LogCleaner(val config: CleanerConfig,
   private val throttler = new Throttler(desiredRatePerSec = config.maxIoBytesPerSecond, 
                                         checkIntervalMs = 300, 
                                         throttleDown = true, 
+                                        "cleaner-io",
+                                        "bytes",
                                         time = time)
   
   /* the threads */
   private val cleaners = (0 until config.numThreads).map(new CleanerThread(_))
   
+  /* a metric to track the maximum utilization of any thread's buffer in the last cleaning */
+  newGauge("max-buffer-utilization-percent", 
+           new Gauge[Int] {
+             def value: Int = cleaners.map(_.lastStats).map(100 * _.bufferUtilization).max.toInt
+           })
+  /* a metric to track the recopy rate of each thread's last cleaning */
+  newGauge("cleaner-recopy-percent", 
+           new Gauge[Int] {
+             def value: Int = {
+               val stats = cleaners.map(_.lastStats)
+               val recopyRate = stats.map(_.bytesWritten).sum.toDouble / math.max(stats.map(_.bytesRead).sum, 1)
+               (100 * recopyRate).toInt
+             }
+           })
+  /* a metric to track the maximum cleaning time for the last cleaning from each thread */
+  newGauge("max-clean-time-secs",
+           new Gauge[Int] {
+             def value: Int = cleaners.map(_.lastStats).map(_.elapsedSecs).max.toInt
+           })
+  
   /**
    * Start the background cleaning
    */
@@ -131,6 +160,9 @@ class LogCleaner(val config: CleanerConfig,
    */
   private class CleanerThread(threadId: Int)
     extends ShutdownableThread(name = "kafka-log-cleaner-thread-" + threadId, isInterruptible = false) {
+    
+    override val loggerName = classOf[LogCleaner].getName
+    
     if(config.dedupeBufferSize / config.numThreads > Int.MaxValue)
       warn("Cannot use more than 2G of cleaner buffer space per cleaner thread, ignoring excess buffer space...")
 
@@ -144,6 +176,9 @@ class LogCleaner(val config: CleanerConfig,
                               time = time,
                               checkDone = checkDone)
     
+    @volatile var lastStats: CleanerStats = new CleanerStats()
+    private val backOffWaitLatch = new CountDownLatch(1)
+
     private def checkDone(topicAndPartition: TopicAndPartition) {
       if (!isRunning.get())
         throw new ThreadShutdownException
@@ -157,6 +192,13 @@ class LogCleaner(val config: CleanerConfig,
       cleanOrSleep()
     }
     
+    
+    override def shutdown() = {
+    	 initiateShutdown()
+    	 backOffWaitLatch.countDown()
+    	 awaitShutdown()
+     }
+     
     /**
      * Clean a log if there is a dirty log available, otherwise sleep for a bit
      */
@@ -164,13 +206,13 @@ class LogCleaner(val config: CleanerConfig,
       cleanerManager.grabFilthiestLog() match {
         case None =>
           // there are no cleanable logs, sleep a while
-          time.sleep(config.backOffMs)
+          backOffWaitLatch.await(config.backOffMs, TimeUnit.MILLISECONDS)
         case Some(cleanable) =>
           // there's a log, clean it
           var endOffset = cleanable.firstDirtyOffset
           try {
             endOffset = cleaner.clean(cleanable)
-            logStats(cleaner.id, cleanable.log.name, cleanable.firstDirtyOffset, endOffset, cleaner.stats)
+            recordStats(cleaner.id, cleanable.log.name, cleanable.firstDirtyOffset, endOffset, cleaner.stats)
           } catch {
             case pe: LogCleaningAbortedException => // task can be aborted, let it go.
           } finally {
@@ -182,10 +224,12 @@ class LogCleaner(val config: CleanerConfig,
     /**
      * Log out statistics on a single run of the cleaner.
      */
-    def logStats(id: Int, name: String, from: Long, to: Long, stats: CleanerStats) {
+    def recordStats(id: Int, name: String, from: Long, to: Long, stats: CleanerStats) {
+      this.lastStats = stats
+      cleaner.statsUnderlying.swap
       def mb(bytes: Double) = bytes / (1024*1024)
       val message = 
-        "%n\tLog cleaner %d cleaned log %s (dirty section = [%d, %d])%n".format(id, name, from, to) + 
+        "%n\tLog cleaner thread %d cleaned log %s (dirty section = [%d, %d])%n".format(id, name, from, to) + 
         "\t%,.1f MB of log processed in %,.1f seconds (%,.1f MB/sec).%n".format(mb(stats.bytesRead), 
                                                                                 stats.elapsedSecs, 
                                                                                 mb(stats.bytesRead/stats.elapsedSecs)) + 
@@ -193,6 +237,7 @@ class LogCleaner(val config: CleanerConfig,
                                                                                            stats.elapsedIndexSecs, 
                                                                                            mb(stats.mapBytesRead)/stats.elapsedIndexSecs, 
                                                                                            100 * stats.elapsedIndexSecs.toDouble/stats.elapsedSecs) + 
+        "\tBuffer utilization: %.1f%%%n".format(100 * stats.bufferUtilization) +
         "\tCleaned %,.1f MB in %.1f seconds (%,.1f Mb/sec, %.1f%% of total time)%n".format(mb(stats.bytesRead), 
                                                                                            stats.elapsedSecs - stats.elapsedIndexSecs, 
                                                                                            mb(stats.bytesRead)/(stats.elapsedSecs - stats.elapsedIndexSecs), 100 * (stats.elapsedSecs - stats.elapsedIndexSecs).toDouble/stats.elapsedSecs) + 
@@ -215,19 +260,22 @@ class LogCleaner(val config: CleanerConfig,
  * @param time The time instance
  */
 private[log] class Cleaner(val id: Int,
-                           offsetMap: OffsetMap,
+                           val offsetMap: OffsetMap,
                            ioBufferSize: Int,
                            maxIoBufferSize: Int,
                            dupBufferLoadFactor: Double,
                            throttler: Throttler,
                            time: Time,
                            checkDone: (TopicAndPartition) => Unit) extends Logging {
+  
+  override val loggerName = classOf[LogCleaner].getName
 
   this.logIdent = "Cleaner " + id + ": "
   
-  /* stats on this cleaning */
-  val stats = new CleanerStats(time)
-  
+  /* cleaning stats - one instance for the current (or next) cleaning cycle and one for the last completed cycle */
+  val statsUnderlying = (new CleanerStats(time), new CleanerStats(time))
+  def stats = statsUnderlying._1
+
   /* buffer used for read i/o */
   private var readBuffer = ByteBuffer.allocate(ioBufferSize)
   
@@ -264,8 +312,12 @@ private[log] class Cleaner(val id: Int,
     info("Cleaning log %s (discarding tombstones prior to %s)...".format(log.name, new Date(deleteHorizonMs)))
     for (group <- groupSegmentsBySize(log.logSegments(0, endOffset), log.config.segmentSize, log.config.maxIndexSize))
       cleanSegments(log, group, offsetMap, deleteHorizonMs)
+      
+    // record buffer utilization
+    stats.bufferUtilization = offsetMap.utilization
     
     stats.allDone()
+
     endOffset
   }
 
@@ -275,7 +327,6 @@ private[log] class Cleaner(val id: Int,
    * @param log The log being cleaned
    * @param segments The group of segments being cleaned
    * @param map The offset map to use for cleaning segments
-   * @param expectedTruncateCount A count used to check if the log is being truncated and rewritten under our feet
    * @param deleteHorizonMs The time to retain delete tombstones
    */
   private[log] def cleanSegments(log: Log,
@@ -289,7 +340,7 @@ private[log] class Cleaner(val id: Int,
     indexFile.delete()
     val messages = new FileMessageSet(logFile)
     val index = new OffsetIndex(indexFile, segments.head.baseOffset, segments.head.index.maxIndexSize)
-    val cleaned = new LogSegment(messages, index, segments.head.baseOffset, segments.head.indexIntervalBytes, time)
+    val cleaned = new LogSegment(messages, index, segments.head.baseOffset, segments.head.indexIntervalBytes, log.config.randomSegmentJitter, time)
 
     try {
       // clean segments into the new destination segment
@@ -499,6 +550,7 @@ private[log] class Cleaner(val id: Int,
  */
 private case class CleanerStats(time: Time = SystemTime) {
   var startTime, mapCompleteTime, endTime, bytesRead, bytesWritten, mapBytesRead, mapMessagesRead, messagesRead, messagesWritten = 0L
+  var bufferUtilization = 0.0d
   clear()
   
   def readMessage(size: Int) {
@@ -538,6 +590,7 @@ private case class CleanerStats(time: Time = SystemTime) {
     mapMessagesRead = 0L
     messagesRead = 0L
     messagesWritten = 0L
+    bufferUtilization = 0.0d
   }
 }
 
diff --git a/core/src/main/scala/kafka/log/LogCleanerManager.scala b/core/src/main/scala/kafka/log/LogCleanerManager.scala
index 1612c8dbcaba8..bcfef77ed53f9 100644
--- a/core/src/main/scala/kafka/log/LogCleanerManager.scala
+++ b/core/src/main/scala/kafka/log/LogCleanerManager.scala
@@ -18,6 +18,8 @@
 package kafka.log
 
 import java.io.File
+import kafka.metrics.KafkaMetricsGroup
+import com.yammer.metrics.core.Gauge
 import kafka.utils.{Logging, Pool}
 import kafka.server.OffsetCheckpoint
 import collection.mutable
@@ -39,7 +41,10 @@ private[log] case object LogCleaningPaused extends LogCleaningState
  *  While a partition is in the LogCleaningPaused state, it won't be scheduled for cleaning again, until cleaning is
  *  requested to be resumed.
  */
-private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[TopicAndPartition, Log]) extends Logging {
+private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[TopicAndPartition, Log]) extends Logging with KafkaMetricsGroup {
+  
+  override val loggerName = classOf[LogCleaner].getName
+  
   /* the offset checkpoints holding the last cleaned point for each log */
   private val checkpoints = logDirs.map(dir => (dir, new OffsetCheckpoint(new File(dir, "cleaner-offset-checkpoint")))).toMap
 
@@ -48,8 +53,13 @@ private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[To
 
   /* a global lock used to control all access to the in-progress set and the offset checkpoints */
   private val lock = new ReentrantLock
+  
   /* for coordinating the pausing and the cleaning of a partition */
   private val pausedCleaningCond = lock.newCondition()
+  
+  /* a gauge for tracking the cleanable ratio of the dirtiest log */
+  @volatile private var dirtiestLogCleanableRatio = 0.0
+  newGauge("max-dirty-percent", new Gauge[Int] { def value = (100 * dirtiestLogCleanableRatio).toInt })
 
   /**
    * @return the position processed for all logs.
@@ -65,15 +75,35 @@ private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[To
   def grabFilthiestLog(): Option[LogToClean] = {
     inLock(lock) {
       val lastClean = allCleanerCheckpoints()
-      val cleanableLogs = logs.filter(l => l._2.config.dedupe)                                     // skip any logs marked for delete rather than dedupe
-                              .filterNot(l => inProgress.contains(l._1))                           // skip any logs already in-progress
-                              .map(l => LogToClean(l._1, l._2, lastClean.getOrElse(l._1, 0)))      // create a LogToClean instance for each
-      val dirtyLogs = cleanableLogs.filter(l => l.totalBytes > 0)                                  // must have some bytes
-                                   .filter(l => l.cleanableRatio > l.log.config.minCleanableRatio) // and must meet the minimum threshold for dirty byte ratio
-      if(dirtyLogs.isEmpty) {
+      val dirtyLogs = logs.filter {
+        case (topicAndPartition, log) => log.config.compact  // skip any logs marked for delete rather than dedupe
+      }.filterNot {
+        case (topicAndPartition, log) => inProgress.contains(topicAndPartition) // skip any logs already in-progress
+      }.map {
+        case (topicAndPartition, log) => // create a LogToClean instance for each
+          // if the log segments are abnormally truncated and hence the checkpointed offset
+          // is no longer valid, reset to the log starting offset and log the error event
+          val logStartOffset = log.logSegments.head.baseOffset
+          val firstDirtyOffset = {
+            val offset = lastClean.getOrElse(topicAndPartition, logStartOffset)
+            if (offset < logStartOffset) {
+              error("Resetting first dirty offset to log start offset %d since the checkpointed offset %d is invalid."
+                    .format(logStartOffset, offset))
+              logStartOffset
+            } else {
+              offset
+            }
+          }
+          LogToClean(topicAndPartition, log, firstDirtyOffset)
+      }.filter(ltc => ltc.totalBytes > 0) // skip any empty logs
+
+      this.dirtiestLogCleanableRatio = if (!dirtyLogs.isEmpty) dirtyLogs.max.cleanableRatio else 0
+      // and must meet the minimum threshold for dirty byte ratio
+      val cleanableLogs = dirtyLogs.filter(ltc => ltc.cleanableRatio > ltc.log.config.minCleanableRatio)
+      if(cleanableLogs.isEmpty) {
         None
       } else {
-        val filthiest = dirtyLogs.max
+        val filthiest = cleanableLogs.max
         inProgress.put(filthiest.topicPartition, LogCleaningInProgress)
         Some(filthiest)
       }
@@ -113,7 +143,8 @@ private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[To
             case LogCleaningInProgress =>
               inProgress.put(topicAndPartition, LogCleaningAborted)
             case s =>
-              throw new IllegalStateException(("Partiiton %s can't be aborted and pasued since it's in %s state").format(topicAndPartition, s))
+              throw new IllegalStateException("Compaction for partition %s cannot be aborted and paused since it is in %s state."
+                                              .format(topicAndPartition, s))
           }
       }
       while (!isCleaningInState(topicAndPartition, LogCleaningPaused))
@@ -129,17 +160,19 @@ private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[To
     inLock(lock) {
       inProgress.get(topicAndPartition) match {
         case None =>
-          throw new IllegalStateException(("Partiiton %s can't be resumed since it's never paused").format(topicAndPartition))
+          throw new IllegalStateException("Compaction for partition %s cannot be resumed since it is not paused."
+                                          .format(topicAndPartition))
         case Some(state) =>
           state match {
             case LogCleaningPaused =>
               inProgress.remove(topicAndPartition)
             case s =>
-              throw new IllegalStateException(("Partiiton %s can't be resumed since it's in %s state").format(topicAndPartition, s))
+              throw new IllegalStateException("Compaction for partition %s cannot be resumed since it is in %s state."
+                                              .format(topicAndPartition, s))
           }
       }
     }
-    info("The cleaning for partition %s is resumed".format(topicAndPartition))
+    info("Compaction for partition %s is resumed".format(topicAndPartition))
   }
 
   /**
@@ -181,7 +214,7 @@ private[log] class LogCleanerManager(val logDirs: Array[File], val logs: Pool[To
           inProgress.put(topicAndPartition, LogCleaningPaused)
           pausedCleaningCond.signalAll()
         case s =>
-          throw new IllegalStateException(("In-progress partiiton %s can't be in %s state").format(topicAndPartition, s))
+          throw new IllegalStateException("In-progress partition %s cannot be in %s state.".format(topicAndPartition, s))
       }
     }
   }
diff --git a/core/src/main/scala/kafka/log/LogConfig.scala b/core/src/main/scala/kafka/log/LogConfig.scala
index 0b32aeeffcd9d..ca7a99e99f641 100644
--- a/core/src/main/scala/kafka/log/LogConfig.scala
+++ b/core/src/main/scala/kafka/log/LogConfig.scala
@@ -18,43 +18,72 @@
 package kafka.log
 
 import java.util.Properties
+import org.apache.kafka.common.utils.Utils
+
 import scala.collection._
-import kafka.common._
+import org.apache.kafka.common.config.ConfigDef
+
+object Defaults {
+  val SegmentSize = 1024 * 1024
+  val SegmentMs = Long.MaxValue
+  val SegmentJitterMs = 0L
+  val FlushInterval = Long.MaxValue
+  val FlushMs = Long.MaxValue
+  val RetentionSize = Long.MaxValue
+  val RetentionMs = Long.MaxValue
+  val MaxMessageSize = Int.MaxValue
+  val MaxIndexSize = 1024 * 1024
+  val IndexInterval = 4096
+  val FileDeleteDelayMs = 60 * 1000L
+  val DeleteRetentionMs = 24 * 60 * 60 * 1000L
+  val MinCleanableDirtyRatio = 0.5
+  val Compact = false
+  val UncleanLeaderElectionEnable = true
+  val MinInSyncReplicas = 1
+}
 
 /**
  * Configuration settings for a log
- * @param segmentSize The soft maximum for the size of a segment file in the log
+ * @param segmentSize The hard maximum for the size of a segment file in the log
  * @param segmentMs The soft maximum on the amount of time before a new log segment is rolled
+ * @param segmentJitterMs The maximum random jitter subtracted from segmentMs to avoid thundering herds of segment rolling
  * @param flushInterval The number of messages that can be written to the log before a flush is forced
  * @param flushMs The amount of time the log can have dirty data before a flush is forced
  * @param retentionSize The approximate total number of bytes this log can use
- * @param retentionMs The age approximate maximum age of the last segment that is retained
+ * @param retentionMs The approximate maximum age of the last segment that is retained
  * @param maxIndexSize The maximum size of an index file
  * @param indexInterval The approximate number of bytes between index entries
  * @param fileDeleteDelayMs The time to wait before deleting a file from the filesystem
  * @param deleteRetentionMs The time to retain delete markers in the log. Only applicable for logs that are being compacted.
  * @param minCleanableRatio The ratio of bytes that are available for cleaning to the bytes already cleaned
- * @param dedupe Should old segments in this log be deleted or deduplicated?
+ * @param compact Should old segments in this log be deleted or deduplicated?
+ * @param uncleanLeaderElectionEnable Indicates whether unclean leader election is enabled
+ * @param minInSyncReplicas If number of insync replicas drops below this number, we stop accepting writes with -1 (or all) required acks
+ *
  */
-case class LogConfig(val segmentSize: Int = 1024*1024, 
-                     val segmentMs: Long = Long.MaxValue,
-                     val flushInterval: Long = Long.MaxValue, 
-                     val flushMs: Long = Long.MaxValue,
-                     val retentionSize: Long = Long.MaxValue,
-                     val retentionMs: Long = Long.MaxValue,
-                     val maxMessageSize: Int = Int.MaxValue,
-                     val maxIndexSize: Int = 1024*1024,
-                     val indexInterval: Int = 4096,
-                     val fileDeleteDelayMs: Long = 60*1000,
-                     val deleteRetentionMs: Long = 24 * 60 * 60 * 1000L,
-                     val minCleanableRatio: Double = 0.5,
-                     val dedupe: Boolean = false) {
-  
+case class LogConfig(val segmentSize: Int = Defaults.SegmentSize,
+                     val segmentMs: Long = Defaults.SegmentMs,
+                     val segmentJitterMs: Long = Defaults.SegmentJitterMs,
+                     val flushInterval: Long = Defaults.FlushInterval,
+                     val flushMs: Long = Defaults.FlushMs,
+                     val retentionSize: Long = Defaults.RetentionSize,
+                     val retentionMs: Long = Defaults.RetentionMs,
+                     val maxMessageSize: Int = Defaults.MaxMessageSize,
+                     val maxIndexSize: Int = Defaults.MaxIndexSize,
+                     val indexInterval: Int = Defaults.IndexInterval,
+                     val fileDeleteDelayMs: Long = Defaults.FileDeleteDelayMs,
+                     val deleteRetentionMs: Long = Defaults.DeleteRetentionMs,
+                     val minCleanableRatio: Double = Defaults.MinCleanableDirtyRatio,
+                     val compact: Boolean = Defaults.Compact,
+                     val uncleanLeaderElectionEnable: Boolean = Defaults.UncleanLeaderElectionEnable,
+                     val minInSyncReplicas: Int = Defaults.MinInSyncReplicas) {
+
   def toProps: Properties = {
     val props = new Properties()
     import LogConfig._
     props.put(SegmentBytesProp, segmentSize.toString)
     props.put(SegmentMsProp, segmentMs.toString)
+    props.put(SegmentJitterMsProp, segmentJitterMs.toString)
     props.put(SegmentIndexBytesProp, maxIndexSize.toString)
     props.put(FlushMessagesProp, flushInterval.toString)
     props.put(FlushMsProp, flushMs.toString)
@@ -65,15 +94,24 @@ case class LogConfig(val segmentSize: Int = 1024*1024,
     props.put(DeleteRetentionMsProp, deleteRetentionMs.toString)
     props.put(FileDeleteDelayMsProp, fileDeleteDelayMs.toString)
     props.put(MinCleanableDirtyRatioProp, minCleanableRatio.toString)
-    props.put(CleanupPolicyProp, if(dedupe) "dedupe" else "delete")
+    props.put(CleanupPolicyProp, if(compact) "compact" else "delete")
+    props.put(UncleanLeaderElectionEnableProp, uncleanLeaderElectionEnable.toString)
+    props.put(MinInSyncReplicasProp, minInSyncReplicas.toString)
     props
   }
-  
+
+  def randomSegmentJitter: Long =
+    if (segmentJitterMs == 0) 0 else Utils.abs(scala.util.Random.nextInt()) % math.min(segmentJitterMs, segmentMs)
 }
 
 object LogConfig {
+
+  val Delete = "delete"
+  val Compact = "compact"
+
   val SegmentBytesProp = "segment.bytes"
   val SegmentMsProp = "segment.ms"
+  val SegmentJitterMsProp = "segment.jitter.ms"
   val SegmentIndexBytesProp = "segment.index.bytes"
   val FlushMessagesProp = "flush.messages"
   val FlushMsProp = "flush.ms"
@@ -85,41 +123,88 @@ object LogConfig {
   val FileDeleteDelayMsProp = "file.delete.delay.ms"
   val MinCleanableDirtyRatioProp = "min.cleanable.dirty.ratio"
   val CleanupPolicyProp = "cleanup.policy"
-  
-  val ConfigNames = Set(SegmentBytesProp, 
-                        SegmentMsProp, 
-                        SegmentIndexBytesProp, 
-                        FlushMessagesProp, 
-                        FlushMsProp, 
-                        RetentionBytesProp, 
-                        RententionMsProp,
-                        MaxMessageBytesProp,
-                        IndexIntervalBytesProp,
-                        FileDeleteDelayMsProp,
-                        DeleteRetentionMsProp,
-                        MinCleanableDirtyRatioProp,
-                        CleanupPolicyProp)
-    
-  
+  val UncleanLeaderElectionEnableProp = "unclean.leader.election.enable"
+  val MinInSyncReplicasProp = "min.insync.replicas"
+
+  val SegmentSizeDoc = "The hard maximum for the size of a segment file in the log"
+  val SegmentMsDoc = "The soft maximum on the amount of time before a new log segment is rolled"
+  val SegmentJitterMsDoc = "The maximum random jitter subtracted from segmentMs to avoid thundering herds of segment" +
+    " rolling"
+  val FlushIntervalDoc = "The number of messages that can be written to the log before a flush is forced"
+  val FlushMsDoc = "The amount of time the log can have dirty data before a flush is forced"
+  val RetentionSizeDoc = "The approximate total number of bytes this log can use"
+  val RetentionMsDoc = "The approximate maximum age of the last segment that is retained"
+  val MaxIndexSizeDoc = "The maximum size of an index file"
+  val MaxMessageSizeDoc = "The maximum size of a message"
+  val IndexIntervalDoc = "The approximate number of bytes between index entries"
+  val FileDeleteDelayMsDoc = "The time to wait before deleting a file from the filesystem"
+  val DeleteRetentionMsDoc = "The time to retain delete markers in the log. Only applicable for logs that are being" +
+    " compacted."
+  val MinCleanableRatioDoc = "The ratio of bytes that are available for cleaning to the bytes already cleaned"
+  val CompactDoc = "Should old segments in this log be deleted or deduplicated?"
+  val UncleanLeaderElectionEnableDoc = "Indicates whether unclean leader election is enabled"
+  val MinInSyncReplicasDoc = "If number of insync replicas drops below this number, we stop accepting writes with" +
+    " -1 (or all) required acks"
+
+  private val configDef = {
+    import ConfigDef.Range._
+    import ConfigDef.ValidString._
+    import ConfigDef.Type._
+    import ConfigDef.Importance._
+    import java.util.Arrays.asList
+
+    new ConfigDef()
+      .define(SegmentBytesProp, INT, Defaults.SegmentSize, atLeast(0), MEDIUM, SegmentSizeDoc)
+      .define(SegmentMsProp, LONG, Defaults.SegmentMs, atLeast(0), MEDIUM, SegmentMsDoc)
+      .define(SegmentJitterMsProp, LONG, Defaults.SegmentJitterMs, atLeast(0), MEDIUM, SegmentJitterMsDoc)
+      .define(SegmentIndexBytesProp, INT, Defaults.MaxIndexSize, atLeast(0), MEDIUM, MaxIndexSizeDoc)
+      .define(FlushMessagesProp, LONG, Defaults.FlushInterval, atLeast(0), MEDIUM, FlushIntervalDoc)
+      .define(FlushMsProp, LONG, Defaults.FlushMs, atLeast(0), MEDIUM, FlushMsDoc)
+      // can be negative. See kafka.log.LogManager.cleanupSegmentsToMaintainSize
+      .define(RetentionBytesProp, LONG, Defaults.RetentionSize, MEDIUM, RetentionSizeDoc)
+      .define(RententionMsProp, LONG, Defaults.RetentionMs, atLeast(0), MEDIUM, RetentionMsDoc)
+      .define(MaxMessageBytesProp, INT, Defaults.MaxMessageSize, atLeast(0), MEDIUM, MaxMessageSizeDoc)
+      .define(IndexIntervalBytesProp, INT, Defaults.IndexInterval, atLeast(0), MEDIUM,  IndexIntervalDoc)
+      .define(DeleteRetentionMsProp, LONG, Defaults.DeleteRetentionMs, atLeast(0), MEDIUM, DeleteRetentionMsDoc)
+      .define(FileDeleteDelayMsProp, LONG, Defaults.FileDeleteDelayMs, atLeast(0), MEDIUM, FileDeleteDelayMsDoc)
+      .define(MinCleanableDirtyRatioProp, DOUBLE, Defaults.MinCleanableDirtyRatio, between(0, 1), MEDIUM,
+        MinCleanableRatioDoc)
+      .define(CleanupPolicyProp, STRING, if (Defaults.Compact) Compact else Delete, in(asList(Compact, Delete)), MEDIUM,
+        CompactDoc)
+      .define(UncleanLeaderElectionEnableProp, BOOLEAN, Defaults.UncleanLeaderElectionEnable,
+        MEDIUM, UncleanLeaderElectionEnableDoc)
+      .define(MinInSyncReplicasProp, INT, Defaults.MinInSyncReplicas, atLeast(1), MEDIUM, MinInSyncReplicasDoc)
+  }
+
+  def configNames() = {
+    import JavaConversions._
+    configDef.names().toList.sorted
+  }
+
   /**
    * Parse the given properties instance into a LogConfig object
    */
   def fromProps(props: Properties): LogConfig = {
-    new LogConfig(segmentSize = props.getProperty(SegmentBytesProp).toInt,
-                  segmentMs = props.getProperty(SegmentMsProp).toLong,
-                  maxIndexSize = props.getProperty(SegmentIndexBytesProp).toInt,
-                  flushInterval = props.getProperty(FlushMessagesProp).toLong,
-                  flushMs = props.getProperty(FlushMsProp).toLong,
-                  retentionSize = props.getProperty(RetentionBytesProp).toLong,
-                  retentionMs = props.getProperty(RententionMsProp).toLong,
-                  maxMessageSize = props.getProperty(MaxMessageBytesProp).toInt,
-                  indexInterval = props.getProperty(IndexIntervalBytesProp).toInt,
-                  fileDeleteDelayMs = props.getProperty(FileDeleteDelayMsProp).toInt,
-                  deleteRetentionMs = props.getProperty(DeleteRetentionMsProp).toLong,
-                  minCleanableRatio = props.getProperty(MinCleanableDirtyRatioProp).toDouble,
-                  dedupe = props.getProperty(CleanupPolicyProp).trim.toLowerCase == "dedupe")
+    import kafka.utils.Utils.evaluateDefaults
+    val parsed = configDef.parse(evaluateDefaults(props))
+    new LogConfig(segmentSize = parsed.get(SegmentBytesProp).asInstanceOf[Int],
+                  segmentMs = parsed.get(SegmentMsProp).asInstanceOf[Long],
+                  segmentJitterMs = parsed.get(SegmentJitterMsProp).asInstanceOf[Long],
+                  maxIndexSize = parsed.get(SegmentIndexBytesProp).asInstanceOf[Int],
+                  flushInterval = parsed.get(FlushMessagesProp).asInstanceOf[Long],
+                  flushMs = parsed.get(FlushMsProp).asInstanceOf[Long],
+                  retentionSize = parsed.get(RetentionBytesProp).asInstanceOf[Long],
+                  retentionMs = parsed.get(RententionMsProp).asInstanceOf[Long],
+                  maxMessageSize = parsed.get(MaxMessageBytesProp).asInstanceOf[Int],
+                  indexInterval = parsed.get(IndexIntervalBytesProp).asInstanceOf[Int],
+                  fileDeleteDelayMs = parsed.get(FileDeleteDelayMsProp).asInstanceOf[Long],
+                  deleteRetentionMs = parsed.get(DeleteRetentionMsProp).asInstanceOf[Long],
+                  minCleanableRatio = parsed.get(MinCleanableDirtyRatioProp).asInstanceOf[Double],
+                  compact = parsed.get(CleanupPolicyProp).asInstanceOf[String].toLowerCase != Delete,
+                  uncleanLeaderElectionEnable = parsed.get(UncleanLeaderElectionEnableProp).asInstanceOf[Boolean],
+                  minInSyncReplicas = parsed.get(MinInSyncReplicasProp).asInstanceOf[Int])
   }
-  
+
   /**
    * Create a log config instance using the given properties and defaults
    */
@@ -128,24 +213,23 @@ object LogConfig {
     props.putAll(overrides)
     fromProps(props)
   }
-  
+
   /**
    * Check that property names are valid
    */
   def validateNames(props: Properties) {
     import JavaConversions._
+    val names = configDef.names()
     for(name <- props.keys)
-      require(LogConfig.ConfigNames.contains(name), "Unknown configuration \"%s\".".format(name))
+      require(names.contains(name), "Unknown configuration \"%s\".".format(name))
   }
-  
+
   /**
-   * Check that the given properties contain only valid log config names, and that all values can be parsed.
+   * Check that the given properties contain only valid log config names and that all values can be parsed and are valid
    */
   def validate(props: Properties) {
     validateNames(props)
-    LogConfig.fromProps(LogConfig().toProps, props) // check that we can parse the values
+    configDef.parse(props)
   }
-  
+
 }
-                      
-                     
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/log/LogManager.scala b/core/src/main/scala/kafka/log/LogManager.scala
index 10062af1e02af..4ebaae00ca4b8 100644
--- a/core/src/main/scala/kafka/log/LogManager.scala
+++ b/core/src/main/scala/kafka/log/LogManager.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.TimeUnit
 import kafka.utils._
 import scala.collection._
 import kafka.common.{TopicAndPartition, KafkaException}
-import kafka.server.OffsetCheckpoint
+import kafka.server.{RecoveringFromUncleanShutdown, BrokerState, OffsetCheckpoint}
+import java.util.concurrent.{Executors, ExecutorService, ExecutionException, Future}
 
 /**
  * The entry point to the kafka log management subsystem. The log manager is responsible for log creation, retrieval, and cleaning.
@@ -39,12 +40,13 @@ class LogManager(val logDirs: Array[File],
                  val topicConfigs: Map[String, LogConfig],
                  val defaultConfig: LogConfig,
                  val cleanerConfig: CleanerConfig,
+                 ioThreads: Int,
                  val flushCheckMs: Long,
                  val flushCheckpointMs: Long,
                  val retentionCheckMs: Long,
                  scheduler: Scheduler,
+                 val brokerState: BrokerState,
                  private val time: Time) extends Logging {
-
   val RecoveryPointCheckpointFile = "recovery-point-offset-checkpoint"
   val LockFile = ".lock"
   val InitialTaskDelayMs = 30*1000
@@ -52,9 +54,9 @@ class LogManager(val logDirs: Array[File],
   private val logs = new Pool[TopicAndPartition, Log]()
 
   createAndValidateLogDirs(logDirs)
-  private var dirLocks = lockLogDirs(logDirs)
+  private val dirLocks = lockLogDirs(logDirs)
   private val recoveryPointCheckpoints = logDirs.map(dir => (dir, new OffsetCheckpoint(new File(dir, RecoveryPointCheckpointFile)))).toMap
-  loadLogs(logDirs)
+  loadLogs()
   
   private val cleaner: LogCleaner = 
     if(cleanerConfig.enableCleaner)
@@ -101,33 +103,71 @@ class LogManager(val logDirs: Array[File],
   /**
    * Recover and load all logs in the given data directories
    */
-  private def loadLogs(dirs: Seq[File]) {
-    for(dir <- dirs) {
+  private def loadLogs(): Unit = {
+    info("Loading logs.")
+
+    val threadPools = mutable.ArrayBuffer.empty[ExecutorService]
+    val jobs = mutable.Map.empty[File, Seq[Future[_]]]
+
+    for (dir <- this.logDirs) {
+      val pool = Executors.newFixedThreadPool(ioThreads)
+      threadPools.append(pool)
+
+      val cleanShutdownFile = new File(dir, Log.CleanShutdownFile)
+
+      if (cleanShutdownFile.exists) {
+        debug(
+          "Found clean shutdown file. " +
+          "Skipping recovery for all logs in data directory: " +
+          dir.getAbsolutePath)
+      } else {
+        // log recovery itself is being performed by `Log` class during initialization
+        brokerState.newState(RecoveringFromUncleanShutdown)
+      }
+
       val recoveryPoints = this.recoveryPointCheckpoints(dir).read
-      /* load the logs */
-      val subDirs = dir.listFiles()
-      if(subDirs != null) {
-        val cleanShutDownFile = new File(dir, Log.CleanShutdownFile)
-        if(cleanShutDownFile.exists())
-          info("Found clean shutdown file. Skipping recovery for all logs in data directory '%s'".format(dir.getAbsolutePath))
-        for(dir <- subDirs) {
-          if(dir.isDirectory) {
-            info("Loading log '" + dir.getName + "'")
-            val topicPartition = Log.parseTopicPartitionName(dir.getName)
-            val config = topicConfigs.getOrElse(topicPartition.topic, defaultConfig)
-            val log = new Log(dir, 
-                              config,
-                              recoveryPoints.getOrElse(topicPartition, 0L),
-                              scheduler,
-                              time)
-            val previous = this.logs.put(topicPartition, log)
-            if(previous != null)
-              throw new IllegalArgumentException("Duplicate log directories found: %s, %s!".format(log.dir.getAbsolutePath, previous.dir.getAbsolutePath))
+
+      val jobsForDir = for {
+        dirContent <- Option(dir.listFiles).toList
+        logDir <- dirContent if logDir.isDirectory
+      } yield {
+        Utils.runnable {
+          debug("Loading log '" + logDir.getName + "'")
+
+          val topicPartition = Log.parseTopicPartitionName(logDir)
+          val config = topicConfigs.getOrElse(topicPartition.topic, defaultConfig)
+          val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition, 0L)
+
+          val current = new Log(logDir, config, logRecoveryPoint, scheduler, time)
+          val previous = this.logs.put(topicPartition, current)
+
+          if (previous != null) {
+            throw new IllegalArgumentException(
+              "Duplicate log directories found: %s, %s!".format(
+              current.dir.getAbsolutePath, previous.dir.getAbsolutePath))
           }
         }
-        cleanShutDownFile.delete()
       }
+
+      jobs(cleanShutdownFile) = jobsForDir.map(pool.submit).toSeq
+    }
+
+
+    try {
+      for ((cleanShutdownFile, dirJobs) <- jobs) {
+        dirJobs.foreach(_.get)
+        cleanShutdownFile.delete()
+      }
+    } catch {
+      case e: ExecutionException => {
+        error("There was an error in one of the threads during logs loading: " + e.getCause)
+        throw e.getCause
+      }
+    } finally {
+      threadPools.foreach(_.shutdown())
     }
+
+    info("Logs loading complete.")
   }
 
   /**
@@ -157,31 +197,69 @@ class LogManager(val logDirs: Array[File],
     if(cleanerConfig.enableCleaner)
       cleaner.startup()
   }
-  
+
   /**
    * Close all the logs
    */
   def shutdown() {
-    debug("Shutting down.")
+    info("Shutting down.")
+
+    val threadPools = mutable.ArrayBuffer.empty[ExecutorService]
+    val jobs = mutable.Map.empty[File, Seq[Future[_]]]
+
+    // stop the cleaner first
+    if (cleaner != null) {
+      Utils.swallow(cleaner.shutdown())
+    }
+
+    // close logs in each dir
+    for (dir <- this.logDirs) {
+      debug("Flushing and closing logs at " + dir)
+
+      val pool = Executors.newFixedThreadPool(ioThreads)
+      threadPools.append(pool)
+
+      val logsInDir = logsByDir.getOrElse(dir.toString, Map()).values
+
+      val jobsForDir = logsInDir map { log =>
+        Utils.runnable {
+          // flush the log to ensure latest possible recovery point
+          log.flush()
+          log.close()
+        }
+      }
+
+      jobs(dir) = jobsForDir.map(pool.submit).toSeq
+    }
+
+
     try {
-      // stop the cleaner first
-      if(cleaner != null)
-        Utils.swallow(cleaner.shutdown())
-      // flush the logs to ensure latest possible recovery point
-      allLogs.foreach(_.flush())
-      // close the logs
-      allLogs.foreach(_.close())
-      // update the last flush point
-      checkpointRecoveryPointOffsets()
-      // mark that the shutdown was clean by creating the clean shutdown marker file
-      logDirs.foreach(dir => Utils.swallow(new File(dir, Log.CleanShutdownFile).createNewFile()))
+      for ((dir, dirJobs) <- jobs) {
+        dirJobs.foreach(_.get)
+
+        // update the last flush point
+        debug("Updating recovery points at " + dir)
+        checkpointLogsInDir(dir)
+
+        // mark that the shutdown was clean by creating marker file
+        debug("Writing clean shutdown marker at " + dir)
+        Utils.swallow(new File(dir, Log.CleanShutdownFile).createNewFile())
+      }
+    } catch {
+      case e: ExecutionException => {
+        error("There was an error in one of the threads during LogManager shutdown: " + e.getCause)
+        throw e.getCause
+      }
     } finally {
+      threadPools.foreach(_.shutdown())
       // regardless of whether the close succeeded, we need to unlock the data directories
       dirLocks.foreach(_.destroy())
     }
-    debug("Shutdown complete.")
+
+    info("Shutdown complete.")
   }
 
+
   /**
    * Truncate the partition logs to the specified offsets and checkpoint the recovery point to this offset
    *
@@ -227,14 +305,19 @@ class LogManager(val logDirs: Array[File],
    * to avoid recovering the whole log on startup.
    */
   def checkpointRecoveryPointOffsets() {
-    val recoveryPointsByDir = this.logsByTopicPartition.groupBy(_._2.dir.getParent.toString)
-    for(dir <- logDirs) {
-        val recoveryPoints = recoveryPointsByDir.get(dir.toString)
-        if(recoveryPoints.isDefined)
-          this.recoveryPointCheckpoints(dir).write(recoveryPoints.get.mapValues(_.recoveryPoint))
+    this.logDirs.foreach(checkpointLogsInDir)
+  }
+
+  /**
+   * Make a checkpoint for all logs in provided directory.
+   */
+  private def checkpointLogsInDir(dir: File): Unit = {
+    val recoveryPoints = this.logsByDir.get(dir.toString)
+    if (recoveryPoints.isDefined) {
+      this.recoveryPointCheckpoints(dir).write(recoveryPoints.get.mapValues(_.recoveryPoint))
     }
   }
-  
+
   /**
    * Get the log if it exists, otherwise return None
    */
@@ -351,7 +434,7 @@ class LogManager(val logDirs: Array[File],
     debug("Beginning log cleanup...")
     var total = 0
     val startMs = time.milliseconds
-    for(log <- allLogs; if !log.config.dedupe) {
+    for(log <- allLogs; if !log.config.compact) {
       debug("Garbage collecting '" + log.name + "'")
       total += cleanupExpiredSegments(log) + cleanupSegmentsToMaintainSize(log)
     }
@@ -363,12 +446,21 @@ class LogManager(val logDirs: Array[File],
    * Get all the partition logs
    */
   def allLogs(): Iterable[Log] = logs.values
-  
+
   /**
    * Get a map of TopicAndPartition => Log
    */
   def logsByTopicPartition = logs.toMap
 
+  /**
+   * Map of log dir to logs by topic and partitions in that dir
+   */
+  private def logsByDir = {
+    this.logsByTopicPartition.groupBy {
+      case (_, log) => log.dir.getParent
+    }
+  }
+
   /**
    * Flush any log which has exceeded its flush interval and has unwritten messages.
    */
diff --git a/core/src/main/scala/kafka/log/LogSegment.scala b/core/src/main/scala/kafka/log/LogSegment.scala
index 0d6926ea105a9..ac9643423a28d 100644
--- a/core/src/main/scala/kafka/log/LogSegment.scala
+++ b/core/src/main/scala/kafka/log/LogSegment.scala
@@ -14,15 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- package kafka.log
+package kafka.log
 
-import scala.math._
-import java.io.File
 import kafka.message._
 import kafka.common._
 import kafka.utils._
+import kafka.server.{LogOffsetMetadata, FetchDataInfo}
 
-/**
+import scala.math._
+import java.io.File
+
+
+ /**
  * A segment of the log. Each segment has two components: a log and an index. The log is a FileMessageSet containing
  * the actual messages. The index is an OffsetIndex that maps from logical offsets to physical file positions. Each 
  * segment has a base offset which is an offset <= the least offset of any message in this segment and > any offset in
@@ -41,18 +44,20 @@ class LogSegment(val log: FileMessageSet,
                  val index: OffsetIndex, 
                  val baseOffset: Long, 
                  val indexIntervalBytes: Int,
+                 val rollJitterMs: Long,
                  time: Time) extends Logging {
   
   var created = time.milliseconds
-  
+
   /* the number of bytes since we last added an entry in the offset index */
   private var bytesSinceLastIndexEntry = 0
   
-  def this(dir: File, startOffset: Long, indexIntervalBytes: Int, maxIndexSize: Int, time: Time) = 
+  def this(dir: File, startOffset: Long, indexIntervalBytes: Int, maxIndexSize: Int, rollJitterMs: Long, time: Time) =
     this(new FileMessageSet(file = Log.logFilename(dir, startOffset)), 
          new OffsetIndex(file = Log.indexFilename(dir, startOffset), baseOffset = startOffset, maxIndexSize = maxIndexSize),
          startOffset,
          indexIntervalBytes,
+         rollJitterMs,
          time)
     
   /* Return the size in bytes of this log segment */
@@ -86,7 +91,7 @@ class LogSegment(val log: FileMessageSet,
    * Find the physical file position for the first message with offset >= the requested offset.
    * 
    * The lowerBound argument is an optimization that can be used if we already know a valid starting position
-   * in the file higher than the greast-lower-bound from the index.
+   * in the file higher than the greatest-lower-bound from the index.
    * 
    * @param offset The offset we want to translate
    * @param startingFilePosition A lower bound on the file position from which to begin the search. This is purely an optimization and
@@ -99,7 +104,7 @@ class LogSegment(val log: FileMessageSet,
     val mapping = index.lookup(offset)
     log.searchFor(offset, max(mapping.position, startingFilePosition))
   }
-  
+
   /**
    * Read a message set from this segment beginning with the first offset >= startOffset. The message set will include
    * no more than maxSize bytes and will end before maxOffset if a maxOffset is specified.
@@ -108,22 +113,27 @@ class LogSegment(val log: FileMessageSet,
    * @param maxSize The maximum number of bytes to include in the message set we read
    * @param maxOffset An optional maximum offset for the message set we read
    * 
-   * @return The message set read or null if the startOffset is larger than the largest offset in this log.
+   * @return The fetched data and the offset metadata of the first message whose offset is >= startOffset,
+   *         or null if the startOffset is larger than the largest offset in this log
    */
   @threadsafe
-  def read(startOffset: Long, maxOffset: Option[Long], maxSize: Int): MessageSet = {
+  def read(startOffset: Long, maxOffset: Option[Long], maxSize: Int): FetchDataInfo = {
     if(maxSize < 0)
       throw new IllegalArgumentException("Invalid max size for log read (%d)".format(maxSize))
-    if(maxSize == 0)
-      return MessageSet.Empty
-    
+
     val logSize = log.sizeInBytes // this may change, need to save a consistent copy
     val startPosition = translateOffset(startOffset)
-    
+
     // if the start position is already off the end of the log, return null
     if(startPosition == null)
       return null
-    
+
+    val offsetMetadata = new LogOffsetMetadata(startOffset, this.baseOffset, startPosition.position)
+
+    // if the size is zero, still return a log segment but with zero size
+    if(maxSize == 0)
+      return FetchDataInfo(offsetMetadata, MessageSet.Empty)
+
     // calculate the length of the message set to read based on whether or not they gave us a maxOffset
     val length = 
       maxOffset match {
@@ -143,7 +153,7 @@ class LogSegment(val log: FileMessageSet,
           min(endPosition - startPosition.position, maxSize) 
         }
       }
-    log.read(startPosition.position, length)
+    FetchDataInfo(offsetMetadata, log.read(startPosition.position, length))
   }
   
   /**
@@ -222,7 +232,7 @@ class LogSegment(val log: FileMessageSet,
     if(ms == null) {
       baseOffset
     } else {
-      ms.lastOption match {
+      ms.messageSet.lastOption match {
         case None => baseOffset
         case Some(last) => last.nextOffset
       }
diff --git a/core/src/main/scala/kafka/log/OffsetIndex.scala b/core/src/main/scala/kafka/log/OffsetIndex.scala
index 8a62dfad9bc48..1c4c7bd89e19e 100644
--- a/core/src/main/scala/kafka/log/OffsetIndex.scala
+++ b/core/src/main/scala/kafka/log/OffsetIndex.scala
@@ -346,7 +346,7 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi
   
   /**
    * Do a basic sanity check on this index to detect obvious problems
-   * @throw IllegalArgumentException if any problems are found
+   * @throws IllegalArgumentException if any problems are found
    */
   def sanityCheck() {
     require(entries == 0 || lastOffset > baseOffset,
diff --git a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
index 73401c5ff34d0..788c7864bc881 100644
--- a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
+++ b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
@@ -17,7 +17,6 @@
 
 package kafka.message
 
-import scala.reflect.BeanProperty
 import kafka.utils.Logging
 import java.nio.ByteBuffer
 import java.nio.channels._
@@ -95,7 +94,7 @@ object ByteBufferMessageSet {
  * Option 2: Give it a list of messages along with instructions relating to serialization format. Producers will use this method.
  * 
  */
-class ByteBufferMessageSet(@BeanProperty val buffer: ByteBuffer) extends MessageSet with Logging {
+class ByteBufferMessageSet(val buffer: ByteBuffer) extends MessageSet with Logging {
   private var shallowValidByteCount = -1
 
   def this(compressionCodec: CompressionCodec, messages: Message*) {
@@ -110,6 +109,8 @@ class ByteBufferMessageSet(@BeanProperty val buffer: ByteBuffer) extends Message
     this(NoCompressionCodec, new AtomicLong(0), messages: _*)
   }
 
+  def getBuffer = buffer
+
   private def shallowValidBytes: Int = {
     if(shallowValidByteCount < 0) {
       var bytes = 0
diff --git a/core/src/main/scala/kafka/message/CompressionCodec.scala b/core/src/main/scala/kafka/message/CompressionCodec.scala
index 8762a790af2ab..9439d2bc29a0c 100644
--- a/core/src/main/scala/kafka/message/CompressionCodec.scala
+++ b/core/src/main/scala/kafka/message/CompressionCodec.scala
@@ -23,6 +23,7 @@ object CompressionCodec {
       case NoCompressionCodec.codec => NoCompressionCodec
       case GZIPCompressionCodec.codec => GZIPCompressionCodec
       case SnappyCompressionCodec.codec => SnappyCompressionCodec
+      case LZ4CompressionCodec.codec => LZ4CompressionCodec
       case _ => throw new kafka.common.UnknownCodecException("%d is an unknown compression codec".format(codec))
     }
   }
@@ -31,6 +32,7 @@ object CompressionCodec {
       case NoCompressionCodec.name => NoCompressionCodec
       case GZIPCompressionCodec.name => GZIPCompressionCodec
       case SnappyCompressionCodec.name => SnappyCompressionCodec
+      case LZ4CompressionCodec.name => LZ4CompressionCodec
       case _ => throw new kafka.common.UnknownCodecException("%s is an unknown compression codec".format(name))
     }
   }
@@ -53,6 +55,11 @@ case object SnappyCompressionCodec extends CompressionCodec {
   val name = "snappy"
 }
 
+case object LZ4CompressionCodec extends CompressionCodec {
+  val codec = 3
+  val name = "lz4"
+}
+
 case object NoCompressionCodec extends CompressionCodec {
   val codec = 0
   val name = "none"
diff --git a/core/src/main/scala/kafka/message/CompressionFactory.scala b/core/src/main/scala/kafka/message/CompressionFactory.scala
index ca833ee317975..c721040bd0420 100644
--- a/core/src/main/scala/kafka/message/CompressionFactory.scala
+++ b/core/src/main/scala/kafka/message/CompressionFactory.scala
@@ -22,6 +22,8 @@ import java.util.zip.GZIPOutputStream
 import java.util.zip.GZIPInputStream
 import java.io.InputStream
 
+import org.apache.kafka.common.message.{KafkaLZ4BlockInputStream, KafkaLZ4BlockOutputStream}
+
 object CompressionFactory {
   
   def apply(compressionCodec: CompressionCodec, stream: OutputStream): OutputStream = {
@@ -31,6 +33,8 @@ object CompressionFactory {
       case SnappyCompressionCodec => 
         import org.xerial.snappy.SnappyOutputStream
         new SnappyOutputStream(stream)
+      case LZ4CompressionCodec =>
+        new KafkaLZ4BlockOutputStream(stream)
       case _ =>
         throw new kafka.common.UnknownCodecException("Unknown Codec: " + compressionCodec)
     }
@@ -43,6 +47,8 @@ object CompressionFactory {
       case SnappyCompressionCodec => 
         import org.xerial.snappy.SnappyInputStream
         new SnappyInputStream(stream)
+      case LZ4CompressionCodec =>
+        new KafkaLZ4BlockInputStream(stream)
       case _ =>
         throw new kafka.common.UnknownCodecException("Unknown Codec: " + compressionCodec)
     }
diff --git a/core/src/main/scala/kafka/message/Message.scala b/core/src/main/scala/kafka/message/Message.scala
index 52c082f5213ba..d3024807a84e1 100644
--- a/core/src/main/scala/kafka/message/Message.scala
+++ b/core/src/main/scala/kafka/message/Message.scala
@@ -54,10 +54,10 @@ object Message {
   val CurrentMagicValue: Byte = 0
 
   /**
-   * Specifies the mask for the compression code. 2 bits to hold the compression codec.
+   * Specifies the mask for the compression code. 3 bits to hold the compression codec.
    * 0 is reserved to indicate no compression
    */
-  val CompressionCodeMask: Int = 0x03 
+  val CompressionCodeMask: Int = 0x07
 
   /**
    * Compression code for uncompressed messages
@@ -70,7 +70,7 @@ object Message {
  * A message. The format of an N byte message is the following:
  *
  * 1. 4 byte CRC32 of the message
- * 2. 1 byte "magic" identifier to allow format changes, value is 2 currently
+ * 2. 1 byte "magic" identifier to allow format changes, value is 0 currently
  * 3. 1 byte "attributes" identifier to allow annotations on the message independent of the version (e.g. compression enabled, type of codec used)
  * 4. 4 byte key length, containing length K
  * 5. K byte key
diff --git a/core/src/main/scala/kafka/message/MessageSet.scala b/core/src/main/scala/kafka/message/MessageSet.scala
index a1b5c63b4d9de..f1b8432f4a96f 100644
--- a/core/src/main/scala/kafka/message/MessageSet.scala
+++ b/core/src/main/scala/kafka/message/MessageSet.scala
@@ -80,17 +80,7 @@ abstract class MessageSet extends Iterable[MessageAndOffset] {
    * Gives the total size of this message set in bytes
    */
   def sizeInBytes: Int
-  
-  /**
-   * Validate the checksum of all the messages in the set. Throws an InvalidMessageException if the checksum doesn't
-   * match the payload for any message.
-   */
-  def validate(): Unit = {
-    for(messageAndOffset <- this)
-      if(!messageAndOffset.message.isValid)
-        throw new InvalidMessageException
-  }
-  
+
   /**
    * Print this message set's contents. If the message set has more than 100 messages, just
    * print the first 100.
diff --git a/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala b/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
index a20ab90165cc7..e9e49180f6de4 100644
--- a/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
+++ b/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,10 +18,15 @@
 package kafka.metrics
 
 
-import com.yammer.metrics.core.{Gauge, MetricName}
-import kafka.utils.Logging
 import java.util.concurrent.TimeUnit
+
 import com.yammer.metrics.Metrics
+import com.yammer.metrics.core.{Gauge, MetricName}
+import kafka.consumer.{ConsumerTopicStatsRegistry, FetchRequestAndResponseStatsRegistry}
+import kafka.producer.{ProducerRequestStatsRegistry, ProducerStatsRegistry, ProducerTopicStatsRegistry}
+import kafka.utils.Logging
+
+import scala.collection.immutable
 
 
 trait KafkaMetricsGroup extends Logging {
@@ -30,25 +35,165 @@ trait KafkaMetricsGroup extends Logging {
    * Creates a new MetricName object for gauges, meters, etc. created for this
    * metrics group.
    * @param name Descriptive name of the metric.
+   * @param tags Additional attributes which mBean will have.
    * @return Sanitized metric name object.
    */
-  private def metricName(name: String) = {
+  private def metricName(name: String, tags: scala.collection.Map[String, String] = Map.empty) = {
     val klass = this.getClass
     val pkg = if (klass.getPackage == null) "" else klass.getPackage.getName
     val simpleName = klass.getSimpleName.replaceAll("\\$$", "")
-    new MetricName(pkg, simpleName, name)
+
+    explicitMetricName(pkg, simpleName, name, tags)
   }
 
-  def newGauge[T](name: String, metric: Gauge[T]) =
-    Metrics.defaultRegistry().newGauge(metricName(name), metric)
 
-  def newMeter(name: String, eventType: String, timeUnit: TimeUnit) =
-    Metrics.defaultRegistry().newMeter(metricName(name), eventType, timeUnit)
+  private def explicitMetricName(group: String, typeName: String, name: String, tags: scala.collection.Map[String, String] = Map.empty) = {
+    val nameBuilder: StringBuilder = new StringBuilder
+
+    nameBuilder.append(group)
+
+    nameBuilder.append(":type=")
+
+    nameBuilder.append(typeName)
+
+    if (name.length > 0) {
+      nameBuilder.append(",name=")
+      nameBuilder.append(name)
+    }
+
+    KafkaMetricsGroup.toMBeanName(tags).map(mbeanName => nameBuilder.append(",").append(mbeanName))
+
+    new MetricName(group, typeName, name, null, nameBuilder.toString())
+  }
+
+  def newGauge[T](name: String, metric: Gauge[T], tags: scala.collection.Map[String, String] = Map.empty) =
+    Metrics.defaultRegistry().newGauge(metricName(name, tags), metric)
+
+  def newMeter(name: String, eventType: String, timeUnit: TimeUnit, tags: scala.collection.Map[String, String] = Map.empty) =
+    Metrics.defaultRegistry().newMeter(metricName(name, tags), eventType, timeUnit)
+
+  def newHistogram(name: String, biased: Boolean = true, tags: scala.collection.Map[String, String] = Map.empty) =
+    Metrics.defaultRegistry().newHistogram(metricName(name, tags), biased)
+
+  def newTimer(name: String, durationUnit: TimeUnit, rateUnit: TimeUnit, tags: scala.collection.Map[String, String] = Map.empty) =
+    Metrics.defaultRegistry().newTimer(metricName(name, tags), durationUnit, rateUnit)
+
+  def removeMetric(name: String, tags: scala.collection.Map[String, String] = Map.empty) =
+    Metrics.defaultRegistry().removeMetric(metricName(name, tags))
+
+
+}
+
+object KafkaMetricsGroup extends KafkaMetricsGroup with Logging {
+  /**
+   * To make sure all the metrics be de-registered after consumer/producer close, the metric names should be
+   * put into the metric name set.
+   */
+  private val consumerMetricNameList: immutable.List[MetricName] = immutable.List[MetricName](
+    // kafka.consumer.ZookeeperConsumerConnector
+    new MetricName("kafka.consumer", "ZookeeperConsumerConnector", "FetchQueueSize"),
+    new MetricName("kafka.consumer", "ZookeeperConsumerConnector", "KafkaCommitsPerSec"),
+    new MetricName("kafka.consumer", "ZookeeperConsumerConnector", "ZooKeeperCommitsPerSec"),
+    new MetricName("kafka.consumer", "ZookeeperConsumerConnector", "RebalanceRateAndTime"),
+    new MetricName("kafka.consumer", "ZookeeperConsumerConnector", "OwnedPartitionsCount"),
+
+    // kafka.consumer.ConsumerFetcherManager
+    new MetricName("kafka.consumer", "ConsumerFetcherManager", "MaxLag"),
+    new MetricName("kafka.consumer", "ConsumerFetcherManager", "MinFetchRate"),
+
+    // kafka.server.AbstractFetcherThread <-- kafka.consumer.ConsumerFetcherThread
+    new MetricName("kafka.server", "FetcherLagMetrics", "ConsumerLag"),
+
+    // kafka.consumer.ConsumerTopicStats <-- kafka.consumer.{ConsumerIterator, PartitionTopicInfo}
+    new MetricName("kafka.consumer", "ConsumerTopicMetrics", "MessagesPerSec"),
+
+    // kafka.consumer.ConsumerTopicStats
+    new MetricName("kafka.consumer", "ConsumerTopicMetrics", "BytesPerSec"),
+
+    // kafka.server.AbstractFetcherThread <-- kafka.consumer.ConsumerFetcherThread
+    new MetricName("kafka.server", "FetcherStats", "BytesPerSec"),
+    new MetricName("kafka.server", "FetcherStats", "RequestsPerSec"),
+
+    // kafka.consumer.FetchRequestAndResponseStats <-- kafka.consumer.SimpleConsumer
+    new MetricName("kafka.consumer", "FetchRequestAndResponseMetrics", "FetchResponseSize"),
+    new MetricName("kafka.consumer", "FetchRequestAndResponseMetrics", "FetchRequestRateAndTimeMs"),
+
+    /**
+     * ProducerRequestStats <-- SyncProducer
+     * metric for SyncProducer in fetchTopicMetaData() needs to be removed when consumer is closed.
+     */
+    new MetricName("kafka.producer", "ProducerRequestMetrics", "ProducerRequestRateAndTimeMs"),
+    new MetricName("kafka.producer", "ProducerRequestMetrics", "ProducerRequestSize")
+  )
 
-  def newHistogram(name: String, biased: Boolean = true) =
-    Metrics.defaultRegistry().newHistogram(metricName(name), biased)
+  private val producerMetricNameList: immutable.List[MetricName] = immutable.List[MetricName](
+    // kafka.producer.ProducerStats <-- DefaultEventHandler <-- Producer
+    new MetricName("kafka.producer", "ProducerStats", "SerializationErrorsPerSec"),
+    new MetricName("kafka.producer", "ProducerStats", "ResendsPerSec"),
+    new MetricName("kafka.producer", "ProducerStats", "FailedSendsPerSec"),
 
-  def newTimer(name: String, durationUnit: TimeUnit, rateUnit: TimeUnit) =
-    Metrics.defaultRegistry().newTimer(metricName(name), durationUnit, rateUnit)
+    // kafka.producer.ProducerSendThread
+    new MetricName("kafka.producer.async", "ProducerSendThread", "ProducerQueueSize"),
 
+    // kafka.producer.ProducerTopicStats <-- kafka.producer.{Producer, async.DefaultEventHandler}
+    new MetricName("kafka.producer", "ProducerTopicMetrics", "MessagesPerSec"),
+    new MetricName("kafka.producer", "ProducerTopicMetrics", "DroppedMessagesPerSec"),
+    new MetricName("kafka.producer", "ProducerTopicMetrics", "BytesPerSec"),
+
+    // kafka.producer.ProducerRequestStats <-- SyncProducer
+    new MetricName("kafka.producer", "ProducerRequestMetrics", "ProducerRequestRateAndTimeMs"),
+    new MetricName("kafka.producer", "ProducerRequestMetrics", "ProducerRequestSize")
+  )
+
+  private def toMBeanName(tags: collection.Map[String, String]): Option[String] = {
+    val filteredTags = tags
+      .filter { case (tagKey, tagValue) => tagValue != ""}
+    if (filteredTags.nonEmpty) {
+      val tagsString = filteredTags
+        .map { case (key, value) => "%s=%s".format(key, value)}
+        .mkString(",")
+
+      Some(tagsString)
+    }
+    else {
+      None
+    }
+  }
+
+  def removeAllConsumerMetrics(clientId: String) {
+    FetchRequestAndResponseStatsRegistry.removeConsumerFetchRequestAndResponseStats(clientId)
+    ConsumerTopicStatsRegistry.removeConsumerTopicStat(clientId)
+    ProducerRequestStatsRegistry.removeProducerRequestStats(clientId)
+    removeAllMetricsInList(KafkaMetricsGroup.consumerMetricNameList, clientId)
+  }
+
+  def removeAllProducerMetrics(clientId: String) {
+    ProducerRequestStatsRegistry.removeProducerRequestStats(clientId)
+    ProducerTopicStatsRegistry.removeProducerTopicStats(clientId)
+    ProducerStatsRegistry.removeProducerStats(clientId)
+    removeAllMetricsInList(KafkaMetricsGroup.producerMetricNameList, clientId)
+  }
+
+  private def removeAllMetricsInList(metricNameList: immutable.List[MetricName], clientId: String) {
+    metricNameList.foreach(metric => {
+      val pattern = (".*clientId=" + clientId + ".*").r
+      val registeredMetrics = scala.collection.JavaConversions.asScalaSet(Metrics.defaultRegistry().allMetrics().keySet())
+      for (registeredMetric <- registeredMetrics) {
+        if (registeredMetric.getGroup == metric.getGroup &&
+          registeredMetric.getName == metric.getName &&
+          registeredMetric.getType == metric.getType) {
+          pattern.findFirstIn(registeredMetric.getMBeanName) match {
+            case Some(_) => {
+              val beforeRemovalSize = Metrics.defaultRegistry().allMetrics().keySet().size
+              Metrics.defaultRegistry().removeMetric(registeredMetric)
+              val afterRemovalSize = Metrics.defaultRegistry().allMetrics().keySet().size
+              trace("Removing metric %s. Metrics registry size reduced from %d to %d".format(
+                registeredMetric, beforeRemovalSize, afterRemovalSize))
+            }
+            case _ =>
+          }
+        }
+      }
+    })
+  }
 }
diff --git a/core/src/main/scala/kafka/network/BlockingChannel.scala b/core/src/main/scala/kafka/network/BlockingChannel.scala
index ab04b3fe0dc67..6e2a38eee8e56 100644
--- a/core/src/main/scala/kafka/network/BlockingChannel.scala
+++ b/core/src/main/scala/kafka/network/BlockingChannel.scala
@@ -42,48 +42,59 @@ class BlockingChannel( val host: String,
   private var readChannel: ReadableByteChannel = null
   private var writeChannel: GatheringByteChannel = null
   private val lock = new Object()
-  
+  private val connectTimeoutMs = readTimeoutMs
+
   def connect() = lock synchronized  {
     if(!connected) {
-      channel = SocketChannel.open()
-      if(readBufferSize > 0)
-        channel.socket.setReceiveBufferSize(readBufferSize)
-      if(writeBufferSize > 0)
-        channel.socket.setSendBufferSize(writeBufferSize)
-      channel.configureBlocking(true)
-      channel.socket.setSoTimeout(readTimeoutMs)
-      channel.socket.setKeepAlive(true)
-      channel.socket.setTcpNoDelay(true)
-      channel.connect(new InetSocketAddress(host, port))
+      try {
+        channel = SocketChannel.open()
+        if(readBufferSize > 0)
+          channel.socket.setReceiveBufferSize(readBufferSize)
+        if(writeBufferSize > 0)
+          channel.socket.setSendBufferSize(writeBufferSize)
+        channel.configureBlocking(true)
+        channel.socket.setSoTimeout(readTimeoutMs)
+        channel.socket.setKeepAlive(true)
+        channel.socket.setTcpNoDelay(true)
+        channel.socket.connect(new InetSocketAddress(host, port), connectTimeoutMs)
 
-      writeChannel = channel
-      readChannel = Channels.newChannel(channel.socket().getInputStream)
-      connected = true
-      // settings may not match what we requested above
-      val msg = "Created socket with SO_TIMEOUT = %d (requested %d), SO_RCVBUF = %d (requested %d), SO_SNDBUF = %d (requested %d)."
-      debug(msg.format(channel.socket.getSoTimeout,
-                       readTimeoutMs,
-                       channel.socket.getReceiveBufferSize, 
-                       readBufferSize,
-                       channel.socket.getSendBufferSize,
-                       writeBufferSize))
+        writeChannel = channel
+        readChannel = Channels.newChannel(channel.socket().getInputStream)
+        connected = true
+        // settings may not match what we requested above
+        val msg = "Created socket with SO_TIMEOUT = %d (requested %d), SO_RCVBUF = %d (requested %d), SO_SNDBUF = %d (requested %d), connectTimeoutMs = %d."
+        debug(msg.format(channel.socket.getSoTimeout,
+                         readTimeoutMs,
+                         channel.socket.getReceiveBufferSize, 
+                         readBufferSize,
+                         channel.socket.getSendBufferSize,
+                         writeBufferSize,
+                         connectTimeoutMs))
+
+      } catch {
+        case e: Throwable => disconnect()
+      }
     }
   }
   
   def disconnect() = lock synchronized {
-    if(connected || channel != null) {
-      // closing the main socket channel *should* close the read channel
-      // but let's do it to be sure.
+    if(channel != null) {
       swallow(channel.close())
       swallow(channel.socket.close())
-      if(readChannel != null) swallow(readChannel.close())
-      channel = null; readChannel = null; writeChannel = null
-      connected = false
+      channel = null
+      writeChannel = null
+    }
+    // closing the main socket channel *should* close the read channel
+    // but let's do it to be sure.
+    if(readChannel != null) {
+      swallow(readChannel.close())
+      readChannel = null
     }
+    connected = false
   }
 
   def isConnected = connected
-  
+
   def send(request: RequestOrResponse):Int = {
     if(!connected)
       throw new ClosedChannelException()
@@ -102,4 +113,4 @@ class BlockingChannel( val host: String,
     response
   }
 
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/kafka/network/BoundedByteBufferSend.scala b/core/src/main/scala/kafka/network/BoundedByteBufferSend.scala
index a624359fb2059..55ecac285e00a 100644
--- a/core/src/main/scala/kafka/network/BoundedByteBufferSend.scala
+++ b/core/src/main/scala/kafka/network/BoundedByteBufferSend.scala
@@ -25,7 +25,7 @@ import kafka.api.RequestOrResponse
 @nonthreadsafe
 private[kafka] class BoundedByteBufferSend(val buffer: ByteBuffer) extends Send {
   
-  private var sizeBuffer = ByteBuffer.allocate(4)
+  private val sizeBuffer = ByteBuffer.allocate(4)
 
   // Avoid possibility of overflow for 2GB-4 byte buffer
   if(buffer.remaining > Int.MaxValue - sizeBuffer.limit)
@@ -53,7 +53,7 @@ private[kafka] class BoundedByteBufferSend(val buffer: ByteBuffer) extends Send
   
   def writeTo(channel: GatheringByteChannel): Int = {
     expectIncomplete()
-    var written = channel.write(Array(sizeBuffer, buffer))
+    val written = channel.write(Array(sizeBuffer, buffer))
     // if we are done, mark it off
     if(!buffer.hasRemaining)
       complete = true    
diff --git a/core/src/main/scala/kafka/network/RequestChannel.scala b/core/src/main/scala/kafka/network/RequestChannel.scala
index a6ec970d72fc8..7b1db3dbbb2c0 100644
--- a/core/src/main/scala/kafka/network/RequestChannel.scala
+++ b/core/src/main/scala/kafka/network/RequestChannel.scala
@@ -121,12 +121,16 @@ class RequestChannel(val numProcessors: Int, val queueSize: Int) extends KafkaMe
     }
   )
 
-  for(i <- 0 until numProcessors) {
-    newGauge(
-      "Processor-" + i + "-ResponseQueueSize",
+  newGauge("ResponseQueueSize", new Gauge[Int]{
+    def value = responseQueues.foldLeft(0) {(total, q) => total + q.size()}
+  })
+
+  for (i <- 0 until numProcessors) {
+    newGauge("ResponseQueueSize",
       new Gauge[Int] {
         def value = responseQueues(i).size()
-      }
+      },
+      Map("processor" -> i.toString)
     )
   }
 
@@ -156,6 +160,10 @@ class RequestChannel(val numProcessors: Int, val queueSize: Int) extends KafkaMe
       onResponse(processor)
   }
 
+  /** Get the next request or block until specified time has elapsed */
+  def receiveRequest(timeout: Long): RequestChannel.Request =
+    requestQueue.poll(timeout, TimeUnit.MILLISECONDS)
+
   /** Get the next request or block until there is one */
   def receiveRequest(): RequestChannel.Request =
     requestQueue.take()
@@ -179,24 +187,25 @@ class RequestChannel(val numProcessors: Int, val queueSize: Int) extends KafkaMe
 
 object RequestMetrics {
   val metricsMap = new scala.collection.mutable.HashMap[String, RequestMetrics]
-  val consumerFetchMetricName = RequestKeys.nameForKey(RequestKeys.FetchKey) + "-Consumer"
-  val followFetchMetricName = RequestKeys.nameForKey(RequestKeys.FetchKey) + "-Follower"
+  val consumerFetchMetricName = RequestKeys.nameForKey(RequestKeys.FetchKey) + "Consumer"
+  val followFetchMetricName = RequestKeys.nameForKey(RequestKeys.FetchKey) + "Follower"
   (RequestKeys.keyToNameAndDeserializerMap.values.map(e => e._1)
     ++ List(consumerFetchMetricName, followFetchMetricName)).foreach(name => metricsMap.put(name, new RequestMetrics(name)))
 }
 
 class RequestMetrics(name: String) extends KafkaMetricsGroup {
-  val requestRate = newMeter(name + "-RequestsPerSec",  "requests", TimeUnit.SECONDS)
+  val tags = Map("request" -> name)
+  val requestRate = newMeter("RequestsPerSec", "requests", TimeUnit.SECONDS, tags)
   // time a request spent in a request queue
-  val requestQueueTimeHist = newHistogram(name + "-RequestQueueTimeMs")
+  val requestQueueTimeHist = newHistogram("RequestQueueTimeMs", biased = true, tags)
   // time a request takes to be processed at the local broker
-  val localTimeHist = newHistogram(name + "-LocalTimeMs")
+  val localTimeHist = newHistogram("LocalTimeMs", biased = true, tags)
   // time a request takes to wait on remote brokers (only relevant to fetch and produce requests)
-  val remoteTimeHist = newHistogram(name + "-RemoteTimeMs")
+  val remoteTimeHist = newHistogram("RemoteTimeMs", biased = true, tags)
   // time a response spent in a response queue
-  val responseQueueTimeHist = newHistogram(name + "-ResponseQueueTimeMs")
+  val responseQueueTimeHist = newHistogram("ResponseQueueTimeMs", biased = true, tags)
   // time to send the response to the requester
-  val responseSendTimeHist = newHistogram(name + "-ResponseSendTimeMs")
-  val totalTimeHist = newHistogram(name + "-TotalTimeMs")
+  val responseSendTimeHist = newHistogram("ResponseSendTimeMs", biased = true, tags)
+  val totalTimeHist = newHistogram("TotalTimeMs", biased = true, tags)
 }
 
diff --git a/core/src/main/scala/kafka/network/SocketServer.scala b/core/src/main/scala/kafka/network/SocketServer.scala
index 216245deb119c..e451592fe3581 100644
--- a/core/src/main/scala/kafka/network/SocketServer.scala
+++ b/core/src/main/scala/kafka/network/SocketServer.scala
@@ -17,14 +17,19 @@
 
 package kafka.network
 
+import java.util
 import java.util.concurrent._
 import java.util.concurrent.atomic._
 import java.net._
 import java.io._
 import java.nio.channels._
 
+import scala.collection._
+
 import kafka.common.KafkaException
+import kafka.metrics.KafkaMetricsGroup
 import kafka.utils._
+import com.yammer.metrics.core.{Gauge, Meter}
 
 /**
  * An NIO socket server. The threading model is
@@ -39,27 +44,47 @@ class SocketServer(val brokerId: Int,
                    val maxQueuedRequests: Int,
                    val sendBufferSize: Int,
                    val recvBufferSize: Int,
-                   val maxRequestSize: Int = Int.MaxValue) extends Logging {
+                   val maxRequestSize: Int = Int.MaxValue,
+                   val maxConnectionsPerIp: Int = Int.MaxValue,
+                   val connectionsMaxIdleMs: Long,
+                   val maxConnectionsPerIpOverrides: Map[String, Int] = Map[String, Int]()) extends Logging with KafkaMetricsGroup {
   this.logIdent = "[Socket Server on Broker " + brokerId + "], "
   private val time = SystemTime
   private val processors = new Array[Processor](numProcessorThreads)
   @volatile private var acceptor: Acceptor = null
   val requestChannel = new RequestChannel(numProcessorThreads, maxQueuedRequests)
 
+  /* a meter to track the average free capacity of the network processors */
+  private val aggregateIdleMeter = newMeter("NetworkProcessorAvgIdlePercent", "percent", TimeUnit.NANOSECONDS)
+
   /**
    * Start the socket server
    */
   def startup() {
+    val quotas = new ConnectionQuotas(maxConnectionsPerIp, maxConnectionsPerIpOverrides)
     for(i <- 0 until numProcessorThreads) {
-      processors(i) = new Processor(i, time, maxRequestSize, requestChannel)
-      Utils.newThread("kafka-processor-%d-%d".format(port, i), processors(i), false).start()
+      processors(i) = new Processor(i, 
+                                    time, 
+                                    maxRequestSize, 
+                                    aggregateIdleMeter,
+                                    newMeter("IdlePercent", "percent", TimeUnit.NANOSECONDS, Map("networkProcessor" -> i.toString)),
+                                    numProcessorThreads, 
+                                    requestChannel,
+                                    quotas,
+                                    connectionsMaxIdleMs)
+      Utils.newThread("kafka-network-thread-%d-%d".format(port, i), processors(i), false).start()
     }
+
+    newGauge("ResponsesBeingSent", new Gauge[Int] {
+      def value = processors.foldLeft(0) { (total, p) => total + p.countInterestOps(SelectionKey.OP_WRITE) }
+    })
+
     // register the processor threads for notification of responses
     requestChannel.addResponseListener((id:Int) => processors(id).wakeup())
    
     // start accepting connections
-    this.acceptor = new Acceptor(host, port, processors, sendBufferSize, recvBufferSize)
-    Utils.newThread("kafka-acceptor", acceptor, false).start()
+    this.acceptor = new Acceptor(host, port, processors, sendBufferSize, recvBufferSize, quotas)
+    Utils.newThread("kafka-socket-acceptor", acceptor, false).start()
     acceptor.awaitStartup
     info("Started")
   }
@@ -80,12 +105,12 @@ class SocketServer(val brokerId: Int,
 /**
  * A base class with some helper variables and methods
  */
-private[kafka] abstract class AbstractServerThread extends Runnable with Logging {
+private[kafka] abstract class AbstractServerThread(connectionQuotas: ConnectionQuotas) extends Runnable with Logging {
 
   protected val selector = Selector.open();
   private val startupLatch = new CountDownLatch(1)
   private val shutdownLatch = new CountDownLatch(1)
-  private val alive = new AtomicBoolean(false)
+  private val alive = new AtomicBoolean(true)
 
   /**
    * Initiates a graceful shutdown by signaling to stop and waiting for the shutdown to complete
@@ -105,7 +130,6 @@ private[kafka] abstract class AbstractServerThread extends Runnable with Logging
    * Record that the thread startup is complete
    */
   protected def startupComplete() = {
-    alive.set(true)
     startupLatch.countDown
   }
 
@@ -124,13 +148,60 @@ private[kafka] abstract class AbstractServerThread extends Runnable with Logging
    */
   def wakeup() = selector.wakeup()
   
+  /**
+   * Close the given key and associated socket
+   */
+  def close(key: SelectionKey) {
+    if(key != null) {
+      key.attach(null)
+      close(key.channel.asInstanceOf[SocketChannel])
+      swallowError(key.cancel())
+    }
+  }
+  
+  def close(channel: SocketChannel) {
+    if(channel != null) {
+      debug("Closing connection from " + channel.socket.getRemoteSocketAddress())
+      connectionQuotas.dec(channel.socket.getInetAddress)
+      swallowError(channel.socket().close())
+      swallowError(channel.close())
+    }
+  }
+  
+  /**
+   * Close all open connections
+   */
+  def closeAll() {
+    // removes cancelled keys from selector.keys set
+    this.selector.selectNow() 
+    val iter = this.selector.keys().iterator()
+    while (iter.hasNext) {
+      val key = iter.next()
+      close(key)
+    }
+  }
+
+  def countInterestOps(ops: Int): Int = {
+    var count = 0
+    val it = this.selector.keys().iterator()
+    while (it.hasNext) {
+      if ((it.next().interestOps() & ops) != 0) {
+        count += 1
+      }
+    }
+    count
+  }
 }
 
 /**
  * Thread that accepts and configures new connections. There is only need for one of these
  */
-private[kafka] class Acceptor(val host: String, val port: Int, private val processors: Array[Processor],
-                              val sendBufferSize: Int, val recvBufferSize: Int) extends AbstractServerThread {
+private[kafka] class Acceptor(val host: String, 
+                              val port: Int, 
+                              private val processors: Array[Processor],
+                              val sendBufferSize: Int, 
+                              val recvBufferSize: Int,
+                              connectionQuotas: ConnectionQuotas) extends AbstractServerThread(connectionQuotas) {
   val serverChannel = openServerSocket(host, port)
 
   /**
@@ -151,14 +222,14 @@ private[kafka] class Acceptor(val host: String, val port: Int, private val proce
             key = iter.next
             iter.remove()
             if(key.isAcceptable)
-                accept(key, processors(currentProcessor))
-              else
-                throw new IllegalStateException("Unrecognized key state for acceptor thread.")
+               accept(key, processors(currentProcessor))
+            else
+               throw new IllegalStateException("Unrecognized key state for acceptor thread.")
 
-              // round robin to the next processor thread
-              currentProcessor = (currentProcessor + 1) % processors.length
+            // round robin to the next processor thread
+            currentProcessor = (currentProcessor + 1) % processors.length
           } catch {
-            case e: Throwable => error("Error in acceptor", e)
+            case e: Throwable => error("Error while accepting connection", e)
           }
         }
       }
@@ -180,6 +251,7 @@ private[kafka] class Acceptor(val host: String, val port: Int, private val proce
         new InetSocketAddress(host, port)
     val serverChannel = ServerSocketChannel.open()
     serverChannel.configureBlocking(false)
+    serverChannel.socket().setReceiveBufferSize(recvBufferSize)
     try {
       serverChannel.socket.bind(socketAddress)
       info("Awaiting socket connections on %s:%d.".format(socketAddress.getHostName, port))
@@ -195,19 +267,24 @@ private[kafka] class Acceptor(val host: String, val port: Int, private val proce
    */
   def accept(key: SelectionKey, processor: Processor) {
     val serverSocketChannel = key.channel().asInstanceOf[ServerSocketChannel]
-    serverSocketChannel.socket().setReceiveBufferSize(recvBufferSize)
-
     val socketChannel = serverSocketChannel.accept()
-    socketChannel.configureBlocking(false)
-    socketChannel.socket().setTcpNoDelay(true)
-    socketChannel.socket().setSendBufferSize(sendBufferSize)
+    try {
+      connectionQuotas.inc(socketChannel.socket().getInetAddress)
+      socketChannel.configureBlocking(false)
+      socketChannel.socket().setTcpNoDelay(true)
+      socketChannel.socket().setSendBufferSize(sendBufferSize)
 
-    debug("Accepted connection from %s on %s. sendBufferSize [actual|requested]: [%d|%d] recvBufferSize [actual|requested]: [%d|%d]"
-          .format(socketChannel.socket.getInetAddress, socketChannel.socket.getLocalSocketAddress,
+      debug("Accepted connection from %s on %s. sendBufferSize [actual|requested]: [%d|%d] recvBufferSize [actual|requested]: [%d|%d]"
+            .format(socketChannel.socket.getInetAddress, socketChannel.socket.getLocalSocketAddress,
                   socketChannel.socket.getSendBufferSize, sendBufferSize,
                   socketChannel.socket.getReceiveBufferSize, recvBufferSize))
 
-    processor.accept(socketChannel)
+      processor.accept(socketChannel)
+    } catch {
+      case e: TooManyConnectionsException =>
+        info("Rejected connection from %s, address already has the configured maximum of %d connections.".format(e.ip, e.count))
+        close(socketChannel)
+    }
   }
 
 }
@@ -219,9 +296,18 @@ private[kafka] class Acceptor(val host: String, val port: Int, private val proce
 private[kafka] class Processor(val id: Int,
                                val time: Time,
                                val maxRequestSize: Int,
-                               val requestChannel: RequestChannel) extends AbstractServerThread {
-  
-  private val newConnections = new ConcurrentLinkedQueue[SocketChannel]();
+                               val aggregateIdleMeter: Meter,
+                               val idleMeter: Meter,
+                               val totalProcessorThreads: Int,
+                               val requestChannel: RequestChannel,
+                               connectionQuotas: ConnectionQuotas,
+                               val connectionsMaxIdleMs: Long) extends AbstractServerThread(connectionQuotas) {
+
+  private val newConnections = new ConcurrentLinkedQueue[SocketChannel]()
+  private val connectionsMaxIdleNanos = connectionsMaxIdleMs * 1000 * 1000
+  private var currentTimeNanos = SystemTime.nanoseconds
+  private val lruConnections = new util.LinkedHashMap[SelectionKey, Long]
+  private var nextIdleCloseCheckTime = currentTimeNanos + connectionsMaxIdleNanos
 
   override def run() {
     startupComplete()
@@ -230,9 +316,18 @@ private[kafka] class Processor(val id: Int,
       configureNewConnections()
       // register any new responses for writing
       processNewResponses()
-      val startSelectTime = SystemTime.milliseconds
+      val startSelectTime = SystemTime.nanoseconds
       val ready = selector.select(300)
-      trace("Processor id " + id + " selection time = " + (SystemTime.milliseconds - startSelectTime) + " ms")
+      currentTimeNanos = SystemTime.nanoseconds
+      val idleTime = currentTimeNanos - startSelectTime
+      idleMeter.mark(idleTime)
+      // We use a single meter for aggregate idle percentage for the thread pool.
+      // Since meter is calculated as total_recorded_value / time_window and
+      // time_window is independent of the number of threads, each recorded idle
+      // time should be discounted by # threads.
+      aggregateIdleMeter.mark(idleTime / totalProcessorThreads)
+
+      trace("Processor id " + id + " selection time = " + idleTime + " ns")
       if(ready > 0) {
         val keys = selector.selectedKeys()
         val iter = keys.iterator()
@@ -263,12 +358,22 @@ private[kafka] class Processor(val id: Int,
           }
         }
       }
+      maybeCloseOldestConnection
     }
     debug("Closing selector.")
+    closeAll()
     swallowError(selector.close())
     shutdownComplete()
   }
 
+  /**
+   * Close the given key and associated socket
+   */
+  override def close(key: SelectionKey): Unit = {
+    lruConnections.remove(key)
+    super.close(key)
+  }
+
   private def processNewResponses() {
     var curr = requestChannel.receiveResponse(id)
     while(curr != null) {
@@ -305,15 +410,6 @@ private[kafka] class Processor(val id: Int,
       }
     }
   }
-  
-  private def close(key: SelectionKey) {
-    val channel = key.channel.asInstanceOf[SocketChannel]
-    debug("Closing connection from " + channel.socket.getRemoteSocketAddress())
-    swallowError(channel.socket().close())
-    swallowError(channel.close())
-    key.attach(null)
-    swallowError(key.cancel())
-  }
 
   /**
    * Queue up a new connection for reading
@@ -338,6 +434,7 @@ private[kafka] class Processor(val id: Int,
    * Process reads from ready sockets
    */
   def read(key: SelectionKey) {
+    lruConnections.put(key, currentTimeNanos)
     val socketChannel = channelFor(key)
     var receive = key.attachment.asInstanceOf[Receive]
     if(key.attachment == null) {
@@ -388,4 +485,50 @@ private[kafka] class Processor(val id: Int,
 
   private def channelFor(key: SelectionKey) = key.channel().asInstanceOf[SocketChannel]
 
+  private def maybeCloseOldestConnection {
+    if(currentTimeNanos > nextIdleCloseCheckTime) {
+      if(lruConnections.isEmpty) {
+        nextIdleCloseCheckTime = currentTimeNanos + connectionsMaxIdleNanos
+      } else {
+        val oldestConnectionEntry = lruConnections.entrySet.iterator().next()
+        val connectionLastActiveTime = oldestConnectionEntry.getValue
+        nextIdleCloseCheckTime = connectionLastActiveTime + connectionsMaxIdleNanos
+        if(currentTimeNanos > nextIdleCloseCheckTime) {
+          val key: SelectionKey = oldestConnectionEntry.getKey
+          trace("About to close the idle connection from " + key.channel.asInstanceOf[SocketChannel].socket.getRemoteSocketAddress
+            + " due to being idle for " + (currentTimeNanos - connectionLastActiveTime) / 1000 / 1000 + " millis")
+          close(key)
+        }
+      }
+    }
+  }
+
 }
+
+class ConnectionQuotas(val defaultMax: Int, overrideQuotas: Map[String, Int]) {
+  private val overrides = overrideQuotas.map(entry => (InetAddress.getByName(entry._1), entry._2))
+  private val counts = mutable.Map[InetAddress, Int]()
+  
+  def inc(addr: InetAddress) {
+    counts synchronized {
+      val count = counts.getOrElse(addr, 0)
+      counts.put(addr, count + 1)
+      val max = overrides.getOrElse(addr, defaultMax)
+      if(count >= max)
+        throw new TooManyConnectionsException(addr, max)
+    }
+  }
+  
+  def dec(addr: InetAddress) {
+    counts synchronized {
+      val count = counts.get(addr).get
+      if(count == 1)
+        counts.remove(addr)
+      else
+        counts.put(addr, count - 1)
+    }
+  }
+  
+}
+
+class TooManyConnectionsException(val ip: InetAddress, val count: Int) extends KafkaException("Too many connections from %s (maximum = %d)".format(ip, count))
diff --git a/core/src/main/scala/kafka/producer/BaseProducer.scala b/core/src/main/scala/kafka/producer/BaseProducer.scala
new file mode 100644
index 0000000000000..8e007130b2825
--- /dev/null
+++ b/core/src/main/scala/kafka/producer/BaseProducer.scala
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.producer
+
+import java.util.Properties
+
+// A base producer used whenever we need to have options for both old and new producers;
+// this class will be removed once we fully rolled out 0.9
+trait BaseProducer {
+  def send(topic: String, key: Array[Byte], value: Array[Byte])
+  def close()
+}
+
+class NewShinyProducer(producerProps: Properties) extends BaseProducer {
+  import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
+  import org.apache.kafka.clients.producer.internals.ErrorLoggingCallback
+
+  // decide whether to send synchronously based on producer properties
+  val sync = producerProps.getProperty("producer.type", "async").equals("sync")
+
+  val producer = new KafkaProducer[Array[Byte],Array[Byte]](producerProps)
+
+  override def send(topic: String, key: Array[Byte], value: Array[Byte]) {
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic, key, value)
+    if(sync) {
+      this.producer.send(record).get()
+    } else {
+      this.producer.send(record,
+        new ErrorLoggingCallback(topic, key, value, false))
+    }
+  }
+
+  override def close() {
+    this.producer.close()
+  }
+}
+
+class OldProducer(producerProps: Properties) extends BaseProducer {
+  import kafka.producer.{KeyedMessage, ProducerConfig}
+
+  // default to byte array partitioner
+  if (producerProps.getProperty("partitioner.class") == null)
+    producerProps.setProperty("partitioner.class", classOf[kafka.producer.ByteArrayPartitioner].getName)
+  val producer = new kafka.producer.Producer[Array[Byte], Array[Byte]](new ProducerConfig(producerProps))
+
+  override def send(topic: String, key: Array[Byte], value: Array[Byte]) {
+    this.producer.send(new KeyedMessage[Array[Byte], Array[Byte]](topic, key, value))
+  }
+
+  override def close() {
+    this.producer.close()
+  }
+}
+
diff --git a/core/src/main/scala/kafka/producer/ByteArrayPartitioner.scala b/core/src/main/scala/kafka/producer/ByteArrayPartitioner.scala
index 988e4374d8c9b..6a3b02e414eb7 100644
--- a/core/src/main/scala/kafka/producer/ByteArrayPartitioner.scala
+++ b/core/src/main/scala/kafka/producer/ByteArrayPartitioner.scala
@@ -20,7 +20,7 @@ package kafka.producer
 
 import kafka.utils._
 
-private class ByteArrayPartitioner(props: VerifiableProperties = null) extends Partitioner {
+class ByteArrayPartitioner(props: VerifiableProperties = null) extends Partitioner {
   def partition(key: Any, numPartitions: Int): Int = {
     Utils.abs(java.util.Arrays.hashCode(key.asInstanceOf[Array[Byte]])) % numPartitions
   }
diff --git a/core/src/main/scala/kafka/producer/ConsoleProducer.scala b/core/src/main/scala/kafka/producer/ConsoleProducer.scala
deleted file mode 100644
index 28de57304db6a..0000000000000
--- a/core/src/main/scala/kafka/producer/ConsoleProducer.scala
+++ /dev/null
@@ -1,238 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.producer
-
-import scala.collection.JavaConversions._
-import joptsimple._
-import java.util.Properties
-import java.io._
-import kafka.common._
-import kafka.message._
-import kafka.serializer._
-
-object ConsoleProducer { 
-
-  def main(args: Array[String]) { 
-    val parser = new OptionParser
-    val topicOpt = parser.accepts("topic", "REQUIRED: The topic id to produce messages to.")
-                           .withRequiredArg
-                           .describedAs("topic")
-                           .ofType(classOf[String])
-    val brokerListOpt = parser.accepts("broker-list", "REQUIRED: The broker list string in the form HOST1:PORT1,HOST2:PORT2.")
-                           .withRequiredArg
-                           .describedAs("broker-list")
-                           .ofType(classOf[String])
-    val syncOpt = parser.accepts("sync", "If set message send requests to the brokers are synchronously, one at a time as they arrive.")
-    val compressOpt = parser.accepts("compress", "If set, messages batches are sent compressed")
-    val batchSizeOpt = parser.accepts("batch-size", "Number of messages to send in a single batch if they are not being sent synchronously.")
-                             .withRequiredArg
-                             .describedAs("size")
-                             .ofType(classOf[java.lang.Integer])
-                             .defaultsTo(200)
-    val messageSendMaxRetriesOpt = parser.accepts("message-send-max-retries", "Brokers can fail receiving the message for multiple reasons, and being unavailable transiently is just one of them. This property specifies the number of retires before the producer give up and drop this message.")
-                             .withRequiredArg
-                             .ofType(classOf[java.lang.Integer])
-                             .defaultsTo(3)
-    val retryBackoffMsOpt = parser.accepts("retry-backoff-ms", "Before each retry, the producer refreshes the metadata of relevant topics. Since leader election takes a bit of time, this property specifies the amount of time that the producer waits before refreshing the metadata.")
-                             .withRequiredArg
-                             .ofType(classOf[java.lang.Long])
-                             .defaultsTo(100)
-    val sendTimeoutOpt = parser.accepts("timeout", "If set and the producer is running in asynchronous mode, this gives the maximum amount of time" + 
-                                                   " a message will queue awaiting suffient batch size. The value is given in ms.")
-                               .withRequiredArg
-                               .describedAs("timeout_ms")
-                               .ofType(classOf[java.lang.Long])
-                               .defaultsTo(1000)
-    val queueSizeOpt = parser.accepts("queue-size", "If set and the producer is running in asynchronous mode, this gives the maximum amount of " + 
-                                                   " messages will queue awaiting suffient batch size.")
-                               .withRequiredArg
-                               .describedAs("queue_size")
-                               .ofType(classOf[java.lang.Long])
-                               .defaultsTo(10000)
-    val queueEnqueueTimeoutMsOpt = parser.accepts("queue-enqueuetimeout-ms", "Timeout for event enqueue")
-                               .withRequiredArg
-                               .describedAs("queue enqueuetimeout ms")
-                               .ofType(classOf[java.lang.Long])
-                               .defaultsTo(Int.MaxValue)
-    val requestRequiredAcksOpt = parser.accepts("request-required-acks", "The required acks of the producer requests")
-                               .withRequiredArg
-                               .describedAs("request required acks")
-                               .ofType(classOf[java.lang.Integer])
-                               .defaultsTo(0)
-    val requestTimeoutMsOpt = parser.accepts("request-timeout-ms", "The ack timeout of the producer requests. Value must be non-negative and non-zero")
-                               .withRequiredArg
-                               .describedAs("request timeout ms")
-                               .ofType(classOf[java.lang.Integer])
-                               .defaultsTo(1500)
-    val valueEncoderOpt = parser.accepts("value-serializer", "The class name of the message encoder implementation to use for serializing values.")
-                                 .withRequiredArg
-                                 .describedAs("encoder_class")
-                                 .ofType(classOf[java.lang.String])
-                                 .defaultsTo(classOf[StringEncoder].getName)
-    val keyEncoderOpt = parser.accepts("key-serializer", "The class name of the message encoder implementation to use for serializing keys.")
-                                 .withRequiredArg
-                                 .describedAs("encoder_class")
-                                 .ofType(classOf[java.lang.String])
-                                 .defaultsTo(classOf[StringEncoder].getName)
-    val messageReaderOpt = parser.accepts("line-reader", "The class name of the class to use for reading lines from standard in. " + 
-                                                          "By default each line is read as a separate message.")
-                                  .withRequiredArg
-                                  .describedAs("reader_class")
-                                  .ofType(classOf[java.lang.String])
-                                  .defaultsTo(classOf[LineMessageReader].getName)
-    val socketBufferSizeOpt = parser.accepts("socket-buffer-size", "The size of the tcp RECV size.")
-                                  .withRequiredArg
-                                  .describedAs("size")
-                                  .ofType(classOf[java.lang.Integer])
-                                  .defaultsTo(1024*100)
-    val propertyOpt = parser.accepts("property", "A mechanism to pass user-defined properties in the form key=value to the message reader. " +
-                                                 "This allows custom configuration for a user-defined message reader.")
-                            .withRequiredArg
-                            .describedAs("prop")
-                            .ofType(classOf[String])
-                            
-
-    val options = parser.parse(args : _*)
-    for(arg <- List(topicOpt, brokerListOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
-
-    val topic = options.valueOf(topicOpt)
-    val brokerList = options.valueOf(brokerListOpt)
-    val sync = options.has(syncOpt)
-    val compress = options.has(compressOpt)
-    val batchSize = options.valueOf(batchSizeOpt)
-    val sendTimeout = options.valueOf(sendTimeoutOpt)
-    val queueSize = options.valueOf(queueSizeOpt)
-    val queueEnqueueTimeoutMs = options.valueOf(queueEnqueueTimeoutMsOpt)
-    val requestRequiredAcks = options.valueOf(requestRequiredAcksOpt)
-    val requestTimeoutMs = options.valueOf(requestTimeoutMsOpt)
-    val keyEncoderClass = options.valueOf(keyEncoderOpt)
-    val valueEncoderClass = options.valueOf(valueEncoderOpt)
-    val readerClass = options.valueOf(messageReaderOpt)
-    val socketBuffer = options.valueOf(socketBufferSizeOpt)
-    val cmdLineProps = parseLineReaderArgs(options.valuesOf(propertyOpt))
-    cmdLineProps.put("topic", topic)
-
-    val props = new Properties()
-    props.put("metadata.broker.list", brokerList)
-    val codec = if(compress) DefaultCompressionCodec.codec else NoCompressionCodec.codec
-    props.put("compression.codec", codec.toString)
-    props.put("producer.type", if(sync) "sync" else "async")
-    if(options.has(batchSizeOpt))
-      props.put("batch.num.messages", batchSize.toString)
-    
-    props.put("message.send.max.retries", options.valueOf(messageSendMaxRetriesOpt).toString)
-    props.put("retry.backoff.ms", options.valueOf(retryBackoffMsOpt).toString)
-    props.put("queue.buffering.max.ms", sendTimeout.toString)
-    props.put("queue.buffering.max.messages", queueSize.toString)
-    props.put("queue.enqueue.timeout.ms", queueEnqueueTimeoutMs.toString)
-    props.put("request.required.acks", requestRequiredAcks.toString)
-    props.put("request.timeout.ms", requestTimeoutMs.toString)
-    props.put("key.serializer.class", keyEncoderClass)
-    props.put("serializer.class", valueEncoderClass)
-    props.put("send.buffer.bytes", socketBuffer.toString)
-    val reader = Class.forName(readerClass).newInstance().asInstanceOf[MessageReader[AnyRef, AnyRef]]
-    reader.init(System.in, cmdLineProps)
-
-    try {
-        val producer = new Producer[AnyRef, AnyRef](new ProducerConfig(props))
-
-        Runtime.getRuntime.addShutdownHook(new Thread() {
-          override def run() {
-            producer.close()
-          }
-        })
-
-        var message: KeyedMessage[AnyRef, AnyRef] = null
-        do {
-          message = reader.readMessage()
-          if(message != null)
-            producer.send(message)
-        } while(message != null)
-    } catch {
-      case e: Exception =>
-        e.printStackTrace
-        System.exit(1)
-    }
-    System.exit(0)
-  }
-
-  def parseLineReaderArgs(args: Iterable[String]): Properties = {
-    val splits = args.map(_ split "=").filterNot(_ == null).filterNot(_.length == 0)
-    if(!splits.forall(_.length == 2)) {
-      System.err.println("Invalid line reader properties: " + args.mkString(" "))
-      System.exit(1)
-    }
-    val props = new Properties
-    for(a <- splits)
-      props.put(a(0), a(1))
-    props
-  }
-
-  trait MessageReader[K,V] { 
-    def init(inputStream: InputStream, props: Properties) {}
-    def readMessage(): KeyedMessage[K,V]
-    def close() {}
-  }
-
-  class LineMessageReader extends MessageReader[String, String] {
-    var topic: String = null
-    var reader: BufferedReader = null
-    var parseKey = false
-    var keySeparator = "\t"
-    var ignoreError = false
-    var lineNumber = 0
-
-    override def init(inputStream: InputStream, props: Properties) {
-      topic = props.getProperty("topic")
-      if(props.containsKey("parse.key"))
-        parseKey = props.getProperty("parse.key").trim.toLowerCase.equals("true")
-      if(props.containsKey("key.separator"))
-        keySeparator = props.getProperty("key.separator")
-      if(props.containsKey("ignore.error"))
-        ignoreError = props.getProperty("ignore.error").trim.toLowerCase.equals("true")
-      reader = new BufferedReader(new InputStreamReader(inputStream))
-    }
-
-    override def readMessage() = {
-      lineNumber += 1
-      (reader.readLine(), parseKey) match {
-        case (null, _) => null
-        case (line, true) =>
-          line.indexOf(keySeparator) match {
-            case -1 =>
-              if(ignoreError)
-                new KeyedMessage(topic, line)
-              else
-                throw new KafkaException("No key found on line " + lineNumber + ": " + line)
-            case n =>
-              new KeyedMessage(topic,
-                             line.substring(0, n), 
-                             if(n + keySeparator.size > line.size) "" else line.substring(n + keySeparator.size))
-          }
-        case (line, false) =>
-          new KeyedMessage(topic, line)
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/kafka/producer/KafkaLog4jAppender.scala b/core/src/main/scala/kafka/producer/KafkaLog4jAppender.scala
index 42239b27ab267..e194942492324 100644
--- a/core/src/main/scala/kafka/producer/KafkaLog4jAppender.scala
+++ b/core/src/main/scala/kafka/producer/KafkaLog4jAppender.scala
@@ -23,74 +23,53 @@ import org.apache.log4j.AppenderSkeleton
 import org.apache.log4j.helpers.LogLog
 import kafka.utils.Logging
 import java.util.{Properties, Date}
+import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 
 class KafkaLog4jAppender extends AppenderSkeleton with Logging {
-  var topic:String = null
-  var serializerClass:String = null
-  var brokerList:String = null
-  var producerType:String = null
-  var compressionCodec:String = null
-  var enqueueTimeout:String = null
-  var queueSize:String = null
+  var topic: String = null
+  var brokerList: String = null
+  var compressionType: String = null
   var requiredNumAcks: Int = Int.MaxValue
+  var syncSend: Boolean = false
 
-  private var producer: Producer[String, String] = null
+  private var producer: KafkaProducer[Array[Byte],Array[Byte]] = null
 
-  def getTopic:String = topic
+  def getTopic: String = topic
   def setTopic(topic: String) { this.topic = topic }
 
-  def getBrokerList:String = brokerList
+  def getBrokerList: String = brokerList
   def setBrokerList(brokerList: String) { this.brokerList = brokerList }
 
-  def getSerializerClass:String = serializerClass
-  def setSerializerClass(serializerClass:String) { this.serializerClass = serializerClass }
+  def getCompressionType: String = compressionType
+  def setCompressionType(compressionType: String) { this.compressionType = compressionType }
 
-  def getProducerType:String = producerType
-  def setProducerType(producerType:String) { this.producerType = producerType }
+  def getRequiredNumAcks: Int = requiredNumAcks
+  def setRequiredNumAcks(requiredNumAcks: Int) { this.requiredNumAcks = requiredNumAcks }
 
-  def getCompressionCodec:String = compressionCodec
-  def setCompressionCodec(compressionCodec:String) { this.compressionCodec = compressionCodec }
-
-  def getEnqueueTimeout:String = enqueueTimeout
-  def setEnqueueTimeout(enqueueTimeout:String) { this.enqueueTimeout = enqueueTimeout }
-
-  def getQueueSize:String = queueSize
-  def setQueueSize(queueSize:String) { this.queueSize = queueSize }
-
-  def getRequiredNumAcks:Int = requiredNumAcks
-  def setRequiredNumAcks(requiredNumAcks:Int) { this.requiredNumAcks = requiredNumAcks }
+  def getSyncSend: Boolean = syncSend
+  def setSyncSend(syncSend: Boolean) { this.syncSend = syncSend }
 
   override def activateOptions() {
     // check for config parameter validity
     val props = new Properties()
     if(brokerList != null)
-      props.put("metadata.broker.list", brokerList)
+      props.put(org.apache.kafka.clients.producer.ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
     if(props.isEmpty)
-      throw new MissingConfigException("The metadata.broker.list property should be specified")
+      throw new MissingConfigException("The bootstrap servers property should be specified")
     if(topic == null)
       throw new MissingConfigException("topic must be specified by the Kafka log4j appender")
-    if(serializerClass == null) {
-      serializerClass = "kafka.serializer.StringEncoder"
-      LogLog.debug("Using default encoder - kafka.serializer.StringEncoder")
-    }
-    props.put("serializer.class", serializerClass)
-    //These have default values in ProducerConfig and AsyncProducerConfig. We don't care if they're not specified
-    if(producerType != null) props.put("producer.type", producerType)
-    if(compressionCodec != null) props.put("compression.codec", compressionCodec)
-    if(enqueueTimeout != null) props.put("queue.enqueue.timeout.ms", enqueueTimeout)
-    if(queueSize != null) props.put("queue.buffering.max.messages", queueSize)
-    if(requiredNumAcks != Int.MaxValue) props.put("request.required.acks", requiredNumAcks.toString)
-    val config : ProducerConfig = new ProducerConfig(props)
-    producer = new Producer[String, String](config)
-    LogLog.debug("Kafka producer connected to " +  config.brokerList)
+    if(compressionType != null) props.put(org.apache.kafka.clients.producer.ProducerConfig.COMPRESSION_TYPE_CONFIG, compressionType)
+    if(requiredNumAcks != Int.MaxValue) props.put(org.apache.kafka.clients.producer.ProducerConfig.ACKS_CONFIG, requiredNumAcks.toString)
+    producer = new KafkaProducer[Array[Byte],Array[Byte]](props)
+    LogLog.debug("Kafka producer connected to " +  brokerList)
     LogLog.debug("Logging for topic: " + topic)
   }
 
   override def append(event: LoggingEvent)  {
     val message = subAppend(event)
     LogLog.debug("[" + new Date(event.getTimeStamp).toString + "]" + message)
-    val messageData = new KeyedMessage[String, String](topic, message)
-    producer.send(messageData);
+    val response = producer.send(new ProducerRecord[Array[Byte],Array[Byte]](topic, message.getBytes()))
+    if (syncSend) response.get
   }
 
   def subAppend(event: LoggingEvent): String = {
@@ -107,5 +86,5 @@ class KafkaLog4jAppender extends AppenderSkeleton with Logging {
     }
   }
 
-  override def requiresLayout: Boolean = false
+  override def requiresLayout: Boolean = true
 }
diff --git a/core/src/main/scala/kafka/producer/Producer.scala b/core/src/main/scala/kafka/producer/Producer.scala
index 4798481d573bb..e38d2fa7ec873 100644
--- a/core/src/main/scala/kafka/producer/Producer.scala
+++ b/core/src/main/scala/kafka/producer/Producer.scala
@@ -16,14 +16,14 @@
  */
 package kafka.producer
 
-import async.{DefaultEventHandler, ProducerSendThread, EventHandler}
-import kafka.utils._
-import java.util.Random
-import java.util.concurrent.{TimeUnit, LinkedBlockingQueue}
-import kafka.serializer.Encoder
 import java.util.concurrent.atomic.AtomicBoolean
-import kafka.common.QueueFullException
+import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
+
+import kafka.common.{AppInfo, QueueFullException}
 import kafka.metrics._
+import kafka.producer.async.{DefaultEventHandler, EventHandler, ProducerSendThread}
+import kafka.serializer.Encoder
+import kafka.utils._
 
 
 class Producer[K,V](val config: ProducerConfig,
@@ -53,6 +53,7 @@ class Producer[K,V](val config: ProducerConfig,
   private val producerTopicStats = ProducerTopicStatsRegistry.getProducerTopicStats(config.clientId)
 
   KafkaMetricsReporter.startReporters(config.props)
+  AppInfo.registerInfo()
 
   def this(config: ProducerConfig) =
     this(config,
@@ -126,9 +127,12 @@ class Producer[K,V](val config: ProducerConfig,
       val canShutdown = hasShutdown.compareAndSet(false, true)
       if(canShutdown) {
         info("Shutting down producer")
+        val startTime = System.nanoTime()
+        KafkaMetricsGroup.removeAllProducerMetrics(config.clientId)
         if (producerSendThread != null)
           producerSendThread.shutdown
         eventHandler.close
+        info("Producer shutdown completed in " + (System.nanoTime() - startTime) / 1000000 + " ms")
       }
     }
   }
diff --git a/core/src/main/scala/kafka/producer/ProducerConfig.scala b/core/src/main/scala/kafka/producer/ProducerConfig.scala
index 7947b18aceb29..3cdf23dce3407 100644
--- a/core/src/main/scala/kafka/producer/ProducerConfig.scala
+++ b/core/src/main/scala/kafka/producer/ProducerConfig.scala
@@ -77,16 +77,7 @@ class ProducerConfig private (val props: VerifiableProperties)
    * This parameter allows you to specify the compression codec for all data generated *
    * by this producer. The default is NoCompressionCodec
    */
-  val compressionCodec = {
-    val prop = props.getString("compression.codec", NoCompressionCodec.name)
-    try {
-      CompressionCodec.getCompressionCodec(prop.toInt)
-    }
-    catch {
-      case nfe: NumberFormatException =>
-        CompressionCodec.getCompressionCodec(prop)
-    }
-  }
+  val compressionCodec = props.getCompressionCodec("compression.codec", NoCompressionCodec)
 
   /** This parameter allows you to set whether compression should be turned *
    *  on for particular topics
diff --git a/core/src/main/scala/kafka/producer/ProducerRequestStats.scala b/core/src/main/scala/kafka/producer/ProducerRequestStats.scala
index 96942205a6a46..026e93a2f1dcc 100644
--- a/core/src/main/scala/kafka/producer/ProducerRequestStats.scala
+++ b/core/src/main/scala/kafka/producer/ProducerRequestStats.scala
@@ -19,11 +19,16 @@ package kafka.producer
 import kafka.metrics.{KafkaTimer, KafkaMetricsGroup}
 import java.util.concurrent.TimeUnit
 import kafka.utils.Pool
-import kafka.common.ClientIdAndBroker
+import kafka.common.{ClientIdAllBrokers, ClientIdBroker, ClientIdAndBroker}
 
-class ProducerRequestMetrics(metricId: ClientIdAndBroker) extends KafkaMetricsGroup {
-  val requestTimer = new KafkaTimer(newTimer(metricId + "ProducerRequestRateAndTimeMs", TimeUnit.MILLISECONDS, TimeUnit.SECONDS))
-  val requestSizeHist = newHistogram(metricId + "ProducerRequestSize")
+class ProducerRequestMetrics(metricId: ClientIdBroker) extends KafkaMetricsGroup {
+  val tags = metricId match {
+    case ClientIdAndBroker(clientId, brokerHost, brokerPort) => Map("clientId" -> clientId, "brokerHost" -> brokerHost, "brokerPort" -> brokerPort.toString)
+    case ClientIdAllBrokers(clientId) => Map("clientId" -> clientId)
+  }
+
+  val requestTimer = new KafkaTimer(newTimer("ProducerRequestRateAndTimeMs", TimeUnit.MILLISECONDS, TimeUnit.SECONDS, tags))
+  val requestSizeHist = newHistogram("ProducerRequestSize", biased = true, tags)
 }
 
 /**
@@ -31,14 +36,14 @@ class ProducerRequestMetrics(metricId: ClientIdAndBroker) extends KafkaMetricsGr
  * @param clientId ClientId of the given producer
  */
 class ProducerRequestStats(clientId: String) {
-  private val valueFactory = (k: ClientIdAndBroker) => new ProducerRequestMetrics(k)
-  private val stats = new Pool[ClientIdAndBroker, ProducerRequestMetrics](Some(valueFactory))
-  private val allBrokersStats = new ProducerRequestMetrics(new ClientIdAndBroker(clientId, "AllBrokers"))
+  private val valueFactory = (k: ClientIdBroker) => new ProducerRequestMetrics(k)
+  private val stats = new Pool[ClientIdBroker, ProducerRequestMetrics](Some(valueFactory))
+  private val allBrokersStats = new ProducerRequestMetrics(new ClientIdAllBrokers(clientId))
 
   def getProducerRequestAllBrokersStats(): ProducerRequestMetrics = allBrokersStats
 
-  def getProducerRequestStats(brokerInfo: String): ProducerRequestMetrics = {
-    stats.getAndMaybePut(new ClientIdAndBroker(clientId, brokerInfo + "-"))
+  def getProducerRequestStats(brokerHost: String, brokerPort: Int): ProducerRequestMetrics = {
+    stats.getAndMaybePut(new ClientIdAndBroker(clientId, brokerHost, brokerPort))
   }
 }
 
@@ -52,5 +57,9 @@ object ProducerRequestStatsRegistry {
   def getProducerRequestStats(clientId: String) = {
     globalStats.getAndMaybePut(clientId)
   }
+
+  def removeProducerRequestStats(clientId: String) {
+    globalStats.remove(clientId)
+  }
 }
 
diff --git a/core/src/main/scala/kafka/producer/ProducerStats.scala b/core/src/main/scala/kafka/producer/ProducerStats.scala
index e1610d3c602fb..1d0fa888c99a5 100644
--- a/core/src/main/scala/kafka/producer/ProducerStats.scala
+++ b/core/src/main/scala/kafka/producer/ProducerStats.scala
@@ -21,9 +21,10 @@ import java.util.concurrent.TimeUnit
 import kafka.utils.Pool
 
 class ProducerStats(clientId: String) extends KafkaMetricsGroup {
-  val serializationErrorRate = newMeter(clientId + "-SerializationErrorsPerSec",  "errors", TimeUnit.SECONDS)
-  val resendRate = newMeter(clientId + "-ResendsPerSec",  "resends", TimeUnit.SECONDS)
-  val failedSendRate = newMeter(clientId + "-FailedSendsPerSec",  "failed sends", TimeUnit.SECONDS)
+  val tags: Map[String, String] = Map("clientId" -> clientId)
+  val serializationErrorRate = newMeter("SerializationErrorsPerSec", "errors", TimeUnit.SECONDS, tags)
+  val resendRate = newMeter("ResendsPerSec", "resends", TimeUnit.SECONDS, tags)
+  val failedSendRate = newMeter("FailedSendsPerSec", "failed sends", TimeUnit.SECONDS, tags)
 }
 
 /**
@@ -36,4 +37,8 @@ object ProducerStatsRegistry {
   def getProducerStats(clientId: String) = {
     statsRegistry.getAndMaybePut(clientId)
   }
+
+  def removeProducerStats(clientId: String) {
+    statsRegistry.remove(clientId)
+  }
 }
diff --git a/core/src/main/scala/kafka/producer/ProducerTopicStats.scala b/core/src/main/scala/kafka/producer/ProducerTopicStats.scala
index ed209f4773ded..97594c8313672 100644
--- a/core/src/main/scala/kafka/producer/ProducerTopicStats.scala
+++ b/core/src/main/scala/kafka/producer/ProducerTopicStats.scala
@@ -17,16 +17,21 @@
 package kafka.producer
 
 import kafka.metrics.KafkaMetricsGroup
-import kafka.common.ClientIdAndTopic
+import kafka.common.{ClientIdTopic, ClientIdAllTopics, ClientIdAndTopic}
 import kafka.utils.{Pool, threadsafe}
 import java.util.concurrent.TimeUnit
 
 
 @threadsafe
-class ProducerTopicMetrics(metricId: ClientIdAndTopic) extends KafkaMetricsGroup {
-  val messageRate = newMeter(metricId + "MessagesPerSec", "messages", TimeUnit.SECONDS)
-  val byteRate = newMeter(metricId + "BytesPerSec", "bytes", TimeUnit.SECONDS)
-  val droppedMessageRate = newMeter(metricId + "DroppedMessagesPerSec", "drops", TimeUnit.SECONDS)
+class ProducerTopicMetrics(metricId: ClientIdTopic) extends KafkaMetricsGroup {
+  val tags = metricId match {
+    case ClientIdAndTopic(clientId, topic) => Map("clientId" -> clientId, "topic" -> topic)
+    case ClientIdAllTopics(clientId) => Map("clientId" -> clientId)
+  }
+
+  val messageRate = newMeter("MessagesPerSec", "messages", TimeUnit.SECONDS, tags)
+  val byteRate = newMeter("BytesPerSec", "bytes", TimeUnit.SECONDS, tags)
+  val droppedMessageRate = newMeter("DroppedMessagesPerSec", "drops", TimeUnit.SECONDS, tags)
 }
 
 /**
@@ -34,14 +39,14 @@ class ProducerTopicMetrics(metricId: ClientIdAndTopic) extends KafkaMetricsGroup
  * @param clientId The clientId of the given producer client.
  */
 class ProducerTopicStats(clientId: String) {
-  private val valueFactory = (k: ClientIdAndTopic) => new ProducerTopicMetrics(k)
-  private val stats = new Pool[ClientIdAndTopic, ProducerTopicMetrics](Some(valueFactory))
-  private val allTopicsStats = new ProducerTopicMetrics(new ClientIdAndTopic(clientId, "AllTopics")) // to differentiate from a topic named AllTopics
+  private val valueFactory = (k: ClientIdTopic) => new ProducerTopicMetrics(k)
+  private val stats = new Pool[ClientIdTopic, ProducerTopicMetrics](Some(valueFactory))
+  private val allTopicsStats = new ProducerTopicMetrics(new ClientIdAllTopics(clientId)) // to differentiate from a topic named AllTopics
 
   def getProducerAllTopicsStats(): ProducerTopicMetrics = allTopicsStats
 
   def getProducerTopicStats(topic: String): ProducerTopicMetrics = {
-    stats.getAndMaybePut(new ClientIdAndTopic(clientId, topic + "-"))
+    stats.getAndMaybePut(new ClientIdAndTopic(clientId, topic))
   }
 }
 
@@ -55,4 +60,8 @@ object ProducerTopicStatsRegistry {
   def getProducerTopicStats(clientId: String) = {
     globalStats.getAndMaybePut(clientId)
   }
+
+  def removeProducerTopicStats(clientId: String) {
+    globalStats.remove(clientId)
+  }
 }
diff --git a/core/src/main/scala/kafka/producer/SyncProducer.scala b/core/src/main/scala/kafka/producer/SyncProducer.scala
index 041cfa59c18fa..0f09951329a8a 100644
--- a/core/src/main/scala/kafka/producer/SyncProducer.scala
+++ b/core/src/main/scala/kafka/producer/SyncProducer.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -22,6 +22,8 @@ import kafka.network.{BlockingChannel, BoundedByteBufferSend, Receive}
 import kafka.utils._
 import java.util.Random
 
+import org.apache.kafka.common.utils.Utils._
+
 object SyncProducer {
   val RequestKey: Short = 0
   val randomGenerator = new Random
@@ -37,10 +39,9 @@ class SyncProducer(val config: SyncProducerConfig) extends Logging {
   @volatile private var shutdown: Boolean = false
   private val blockingChannel = new BlockingChannel(config.host, config.port, BlockingChannel.UseDefaultBufferSize,
     config.sendBufferBytes, config.requestTimeoutMs)
-  val brokerInfo = "host_%s-port_%s".format(config.host, config.port)
   val producerRequestStats = ProducerRequestStatsRegistry.getProducerRequestStats(config.clientId)
 
-  trace("Instantiating Scala Sync Producer")
+  trace("Instantiating Scala Sync Producer with properties: %s".format(config.props))
 
   private def verifyRequest(request: RequestOrResponse) = {
     /**
@@ -91,11 +92,11 @@ class SyncProducer(val config: SyncProducerConfig) extends Logging {
    */
   def send(producerRequest: ProducerRequest): ProducerResponse = {
     val requestSize = producerRequest.sizeInBytes
-    producerRequestStats.getProducerRequestStats(brokerInfo).requestSizeHist.update(requestSize)
+    producerRequestStats.getProducerRequestStats(config.host, config.port).requestSizeHist.update(requestSize)
     producerRequestStats.getProducerRequestAllBrokersStats.requestSizeHist.update(requestSize)
 
     var response: Receive = null
-    val specificTimer = producerRequestStats.getProducerRequestStats(brokerInfo).requestTimer
+    val specificTimer = producerRequestStats.getProducerRequestStats(config.host, config.port).requestTimer
     val aggregateTimer = producerRequestStats.getProducerRequestAllBrokersStats.requestTimer
     aggregateTimer.time {
       specificTimer.time {
@@ -126,24 +127,22 @@ class SyncProducer(val config: SyncProducerConfig) extends Logging {
    */
   private def disconnect() {
     try {
-      if(blockingChannel.isConnected) {
-        info("Disconnecting from " + config.host + ":" + config.port)
-        blockingChannel.disconnect()
-      }
+      info("Disconnecting from " + formatAddress(config.host, config.port))
+      blockingChannel.disconnect()
     } catch {
       case e: Exception => error("Error on disconnect: ", e)
     }
   }
-    
+
   private def connect(): BlockingChannel = {
     if (!blockingChannel.isConnected && !shutdown) {
       try {
         blockingChannel.connect()
-        info("Connected to " + config.host + ":" + config.port + " for producing")
+        info("Connected to " + formatAddress(config.host, config.port) + " for producing")
       } catch {
         case e: Exception => {
           disconnect()
-          error("Producer connection to " +  config.host + ":" + config.port + " unsuccessful", e)
+          error("Producer connection to " + formatAddress(config.host, config.port) + " unsuccessful", e)
           throw e
         }
       }
@@ -156,5 +155,4 @@ class SyncProducer(val config: SyncProducerConfig) extends Logging {
       connect()
     }
   }
-}
-
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/producer/SyncProducerConfig.scala b/core/src/main/scala/kafka/producer/SyncProducerConfig.scala
index 69b2d0c11bb14..a08ce00a0aae7 100644
--- a/core/src/main/scala/kafka/producer/SyncProducerConfig.scala
+++ b/core/src/main/scala/kafka/producer/SyncProducerConfig.scala
@@ -42,11 +42,15 @@ trait SyncProducerConfigShared {
   val clientId = props.getString("client.id", SyncProducerConfig.DefaultClientId)
 
   /*
-   * The required acks of the producer requests - negative value means ack
-   * after the replicas in ISR have caught up to the leader's offset
-   * corresponding to this produce request.
+   * The number of acknowledgments the producer requires the leader to have received before considering a request complete.
+   * This controls the durability of the messages sent by the producer.
+   *
+   * request.required.acks = 0 - means the producer will not wait for any acknowledgement from the leader.
+   * request.required.acks = 1 - means the leader will write the message to its local log and immediately acknowledge
+   * request.required.acks = -1 - means the leader will wait for acknowledgement from all in-sync replicas before acknowledging the write
    */
-  val requestRequiredAcks = props.getShort("request.required.acks", SyncProducerConfig.DefaultRequiredAcks)
+
+  val requestRequiredAcks = props.getShortInRange("request.required.acks", SyncProducerConfig.DefaultRequiredAcks,(-1,1))
 
   /*
    * The ack timeout of the producer requests. Value must be non-negative and non-zero
@@ -59,4 +63,4 @@ object SyncProducerConfig {
   val DefaultClientId = ""
   val DefaultRequiredAcks : Short = 0
   val DefaultAckTimeoutMs = 10000
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/kafka/producer/async/DefaultEventHandler.scala b/core/src/main/scala/kafka/producer/async/DefaultEventHandler.scala
index d8ac915de31a2..821901e4f434d 100644
--- a/core/src/main/scala/kafka/producer/async/DefaultEventHandler.scala
+++ b/core/src/main/scala/kafka/producer/async/DefaultEventHandler.scala
@@ -95,34 +95,35 @@ class DefaultEventHandler[K,V](config: ProducerConfig,
     val partitionedDataOpt = partitionAndCollate(messages)
     partitionedDataOpt match {
       case Some(partitionedData) =>
-        val failedProduceRequests = new ArrayBuffer[KeyedMessage[K,Message]]
-        try {
-          for ((brokerid, messagesPerBrokerMap) <- partitionedData) {
-            if (logger.isTraceEnabled)
-              messagesPerBrokerMap.foreach(partitionAndEvent =>
-                trace("Handling event for Topic: %s, Broker: %d, Partitions: %s".format(partitionAndEvent._1, brokerid, partitionAndEvent._2)))
-            val messageSetPerBroker = groupMessagesToSet(messagesPerBrokerMap)
-
-            val failedTopicPartitions = send(brokerid, messageSetPerBroker)
-            failedTopicPartitions.foreach(topicPartition => {
-              messagesPerBrokerMap.get(topicPartition) match {
-                case Some(data) => failedProduceRequests.appendAll(data)
-                case None => // nothing
-              }
-            })
+        val failedProduceRequests = new ArrayBuffer[KeyedMessage[K, Message]]
+        for ((brokerid, messagesPerBrokerMap) <- partitionedData) {
+          if (logger.isTraceEnabled) {
+            messagesPerBrokerMap.foreach(partitionAndEvent =>
+              trace("Handling event for Topic: %s, Broker: %d, Partitions: %s".format(partitionAndEvent._1, brokerid, partitionAndEvent._2)))
+          }
+          val messageSetPerBrokerOpt = groupMessagesToSet(messagesPerBrokerMap)
+          messageSetPerBrokerOpt match {
+            case Some(messageSetPerBroker) =>
+              val failedTopicPartitions = send(brokerid, messageSetPerBroker)
+              failedTopicPartitions.foreach(topicPartition => {
+                messagesPerBrokerMap.get(topicPartition) match {
+                  case Some(data) => failedProduceRequests.appendAll(data)
+                  case None => // nothing
+                }
+              })
+            case None => // failed to group messages
+              messagesPerBrokerMap.values.foreach(m => failedProduceRequests.appendAll(m))
           }
-        } catch {
-          case t: Throwable => error("Failed to send messages", t)
         }
         failedProduceRequests
-      case None => // all produce requests failed
+      case None => // failed to collate messages
         messages
     }
   }
 
   def serialize(events: Seq[KeyedMessage[K,V]]): Seq[KeyedMessage[K,Message]] = {
     val serializedMessages = new ArrayBuffer[KeyedMessage[K,Message]](events.size)
-    events.map{e =>
+    events.foreach{e =>
       try {
         if(e.hasKey)
           serializedMessages += new KeyedMessage[K,Message](topic = e.topic, key = e.key, partKey = e.partKey, message = new Message(key = keyEncoder.toBytes(e.key), bytes = encoder.toBytes(e.message)))
@@ -290,43 +291,46 @@ class DefaultEventHandler[K,V](config: ProducerConfig,
     }
   }
 
-  private def groupMessagesToSet(messagesPerTopicAndPartition: collection.mutable.Map[TopicAndPartition, Seq[KeyedMessage[K,Message]]]) = {
+  private def groupMessagesToSet(messagesPerTopicAndPartition: collection.mutable.Map[TopicAndPartition, Seq[KeyedMessage[K, Message]]]) = {
     /** enforce the compressed.topics config here.
-      *  If the compression codec is anything other than NoCompressionCodec,
-      *    Enable compression only for specified topics if any
-      *    If the list of compressed topics is empty, then enable the specified compression codec for all topics
-      *  If the compression codec is NoCompressionCodec, compression is disabled for all topics
+      * If the compression codec is anything other than NoCompressionCodec,
+      * Enable compression only for specified topics if any
+      * If the list of compressed topics is empty, then enable the specified compression codec for all topics
+      * If the compression codec is NoCompressionCodec, compression is disabled for all topics
       */
-
-    val messagesPerTopicPartition = messagesPerTopicAndPartition.map { case (topicAndPartition, messages) =>
-      val rawMessages = messages.map(_.message)
-      ( topicAndPartition,
-        config.compressionCodec match {
-          case NoCompressionCodec =>
-            debug("Sending %d messages with no compression to %s".format(messages.size, topicAndPartition))
-            new ByteBufferMessageSet(NoCompressionCodec, rawMessages: _*)
-          case _ =>
-            config.compressedTopics.size match {
-              case 0 =>
-                debug("Sending %d messages with compression codec %d to %s"
-                  .format(messages.size, config.compressionCodec.codec, topicAndPartition))
-                new ByteBufferMessageSet(config.compressionCodec, rawMessages: _*)
-              case _ =>
-                if(config.compressedTopics.contains(topicAndPartition.topic)) {
+    try {
+      val messagesPerTopicPartition = messagesPerTopicAndPartition.map { case (topicAndPartition, messages) =>
+        val rawMessages = messages.map(_.message)
+        (topicAndPartition,
+          config.compressionCodec match {
+            case NoCompressionCodec =>
+              debug("Sending %d messages with no compression to %s".format(messages.size, topicAndPartition))
+              new ByteBufferMessageSet(NoCompressionCodec, rawMessages: _*)
+            case _ =>
+              config.compressedTopics.size match {
+                case 0 =>
                   debug("Sending %d messages with compression codec %d to %s"
                     .format(messages.size, config.compressionCodec.codec, topicAndPartition))
                   new ByteBufferMessageSet(config.compressionCodec, rawMessages: _*)
-                }
-                else {
-                  debug("Sending %d messages to %s with no compression as it is not in compressed.topics - %s"
-                    .format(messages.size, topicAndPartition, config.compressedTopics.toString))
-                  new ByteBufferMessageSet(NoCompressionCodec, rawMessages: _*)
-                }
-            }
-        }
-        )
+                case _ =>
+                  if (config.compressedTopics.contains(topicAndPartition.topic)) {
+                    debug("Sending %d messages with compression codec %d to %s"
+                      .format(messages.size, config.compressionCodec.codec, topicAndPartition))
+                    new ByteBufferMessageSet(config.compressionCodec, rawMessages: _*)
+                  }
+                  else {
+                    debug("Sending %d messages to %s with no compression as it is not in compressed.topics - %s"
+                      .format(messages.size, topicAndPartition, config.compressedTopics.toString))
+                    new ByteBufferMessageSet(NoCompressionCodec, rawMessages: _*)
+                  }
+              }
+          }
+          )
+      }
+      Some(messagesPerTopicPartition)
+    } catch {
+      case t: Throwable => error("Failed to group messages", t); None
     }
-    messagesPerTopicPartition
   }
 
   def close() {
diff --git a/core/src/main/scala/kafka/producer/async/ProducerSendThread.scala b/core/src/main/scala/kafka/producer/async/ProducerSendThread.scala
index 42e9c741c2dce..2ccf82a6f1ed6 100644
--- a/core/src/main/scala/kafka/producer/async/ProducerSendThread.scala
+++ b/core/src/main/scala/kafka/producer/async/ProducerSendThread.scala
@@ -34,10 +34,11 @@ class ProducerSendThread[K,V](val threadName: String,
   private val shutdownLatch = new CountDownLatch(1)
   private val shutdownCommand = new KeyedMessage[K,V]("shutdown", null.asInstanceOf[K], null.asInstanceOf[V])
 
-  newGauge(clientId + "-ProducerQueueSize",
+  newGauge("ProducerQueueSize",
           new Gauge[Int] {
             def value = queue.size
-          })
+          },
+          Map("clientId" -> clientId))
 
   override def run {
     try {
diff --git a/core/src/main/scala/kafka/server/AbstractFetcherManager.scala b/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
index 9390edf37dae9..20c00cb8cc235 100644
--- a/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
+++ b/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
@@ -26,7 +26,7 @@ import kafka.metrics.KafkaMetricsGroup
 import kafka.common.TopicAndPartition
 import com.yammer.metrics.core.Gauge
 
-abstract class AbstractFetcherManager(protected val name: String, metricPrefix: String, numFetchers: Int = 1)
+abstract class AbstractFetcherManager(protected val name: String, clientId: String, numFetchers: Int = 1)
   extends Logging with KafkaMetricsGroup {
   // map of (source broker_id, fetcher_id per source broker) => fetcher
   private val fetcherThreadMap = new mutable.HashMap[BrokerAndFetcherId, AbstractFetcherThread]
@@ -34,7 +34,7 @@ abstract class AbstractFetcherManager(protected val name: String, metricPrefix:
   this.logIdent = "[" + name + "] "
 
   newGauge(
-    metricPrefix + "-MaxLag",
+    "MaxLag",
     new Gauge[Long] {
       // current max lag across all fetchers/topics/partitions
       def value = fetcherThreadMap.foldLeft(0L)((curMaxAll, fetcherThreadMapEntry) => {
@@ -42,24 +42,25 @@ abstract class AbstractFetcherManager(protected val name: String, metricPrefix:
           curMaxThread.max(fetcherLagStatsEntry._2.lag)
         }).max(curMaxAll)
       })
-    }
+    },
+    Map("clientId" -> clientId)
   )
 
   newGauge(
-    metricPrefix + "-MinFetchRate",
-    {
-      new Gauge[Double] {
-        // current min fetch rate across all fetchers/topics/partitions
-        def value = {
-          val headRate: Double =
-            fetcherThreadMap.headOption.map(_._2.fetcherStats.requestRate.oneMinuteRate).getOrElse(0)
-
-          fetcherThreadMap.foldLeft(headRate)((curMinAll, fetcherThreadMapEntry) => {
-            fetcherThreadMapEntry._2.fetcherStats.requestRate.oneMinuteRate.min(curMinAll)
-          })
-        }
+  "MinFetchRate", {
+    new Gauge[Double] {
+      // current min fetch rate across all fetchers/topics/partitions
+      def value = {
+        val headRate: Double =
+          fetcherThreadMap.headOption.map(_._2.fetcherStats.requestRate.oneMinuteRate).getOrElse(0)
+
+        fetcherThreadMap.foldLeft(headRate)((curMinAll, fetcherThreadMapEntry) => {
+          fetcherThreadMapEntry._2.fetcherStats.requestRate.oneMinuteRate.min(curMinAll)
+        })
       }
     }
+  },
+  Map("clientId" -> clientId)
   )
 
   private def getFetcherId(topic: String, partitionId: Int) : Int = {
diff --git a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
index db7017b8a8ff0..8c281d4668f92 100644
--- a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
+++ b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
@@ -18,21 +18,20 @@
 package kafka.server
 
 import kafka.cluster.Broker
-import collection.mutable
-import scala.collection.Set
-import scala.collection.Map
-import kafka.message.{InvalidMessageException, ByteBufferMessageSet, MessageAndOffset}
-import kafka.metrics.KafkaMetricsGroup
-import com.yammer.metrics.core.Gauge
 import kafka.utils.{Pool, ShutdownableThread}
 import kafka.consumer.{PartitionTopicInfo, SimpleConsumer}
 import kafka.api.{FetchRequest, FetchResponse, FetchResponsePartitionData, FetchRequestBuilder}
 import kafka.common.{KafkaException, ClientIdAndBroker, TopicAndPartition, ErrorMapping}
 import kafka.utils.Utils.inLock
+import kafka.message.{InvalidMessageException, ByteBufferMessageSet, MessageAndOffset}
+import kafka.metrics.KafkaMetricsGroup
+
+import scala.collection.{mutable, Set, Map}
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.locks.ReentrantLock
 import java.util.concurrent.atomic.AtomicLong
 
+import com.yammer.metrics.core.Gauge
 
 /**
  *  Abstract class for fetching data from multiple partitions from the same broker.
@@ -45,8 +44,7 @@ abstract class AbstractFetcherThread(name: String, clientId: String, sourceBroke
   private val partitionMapLock = new ReentrantLock
   private val partitionMapCond = partitionMapLock.newCondition()
   val simpleConsumer = new SimpleConsumer(sourceBroker.host, sourceBroker.port, socketTimeout, socketBufferSize, clientId)
-  private val brokerInfo = "host_%s-port_%s".format(sourceBroker.host, sourceBroker.port)
-  private val metricId = new ClientIdAndBroker(clientId, brokerInfo)
+  private val metricId = new ClientIdAndBroker(clientId, sourceBroker.host, sourceBroker.port)
   val fetcherStats = new FetcherStats(metricId)
   val fetcherLagStats = new FetcherLagStats(metricId)
   val fetchRequestBuilder = new FetchRequestBuilder().
@@ -92,12 +90,12 @@ abstract class AbstractFetcherThread(name: String, clientId: String, sourceBroke
     val partitionsWithError = new mutable.HashSet[TopicAndPartition]
     var response: FetchResponse = null
     try {
-      trace("issuing to broker %d of fetch request %s".format(sourceBroker.id, fetchRequest))
+      trace("Issuing to broker %d of fetch request %s".format(sourceBroker.id, fetchRequest))
       response = simpleConsumer.fetch(fetchRequest)
     } catch {
       case t: Throwable =>
         if (isRunning.get) {
-          error("Error in fetch %s".format(fetchRequest), t)
+          warn("Error in fetch %s. Possible cause: %s".format(fetchRequest, t.toString))
           partitionMapLock synchronized {
             partitionsWithError ++= partitionMap.keys
           }
@@ -203,13 +201,15 @@ abstract class AbstractFetcherThread(name: String, clientId: String, sourceBroke
   }
 }
 
-class FetcherLagMetrics(metricId: ClientIdBrokerTopicPartition) extends KafkaMetricsGroup {
+class FetcherLagMetrics(metricId: ClientIdTopicPartition) extends KafkaMetricsGroup {
   private[this] val lagVal = new AtomicLong(-1L)
-  newGauge(
-    metricId + "-ConsumerLag",
+  newGauge("ConsumerLag",
     new Gauge[Long] {
       def value = lagVal.get
-    }
+    },
+    Map("clientId" -> metricId.clientId,
+      "topic" -> metricId.topic,
+      "partition" -> metricId.partitionId.toString)
   )
 
   def lag_=(newLag: Long) {
@@ -220,20 +220,25 @@ class FetcherLagMetrics(metricId: ClientIdBrokerTopicPartition) extends KafkaMet
 }
 
 class FetcherLagStats(metricId: ClientIdAndBroker) {
-  private val valueFactory = (k: ClientIdBrokerTopicPartition) => new FetcherLagMetrics(k)
-  val stats = new Pool[ClientIdBrokerTopicPartition, FetcherLagMetrics](Some(valueFactory))
+  private val valueFactory = (k: ClientIdTopicPartition) => new FetcherLagMetrics(k)
+  val stats = new Pool[ClientIdTopicPartition, FetcherLagMetrics](Some(valueFactory))
 
   def getFetcherLagStats(topic: String, partitionId: Int): FetcherLagMetrics = {
-    stats.getAndMaybePut(new ClientIdBrokerTopicPartition(metricId.clientId, metricId.brokerInfo, topic, partitionId))
+    stats.getAndMaybePut(new ClientIdTopicPartition(metricId.clientId, topic, partitionId))
   }
 }
 
 class FetcherStats(metricId: ClientIdAndBroker) extends KafkaMetricsGroup {
-  val requestRate = newMeter(metricId + "-RequestsPerSec", "requests", TimeUnit.SECONDS)
-  val byteRate = newMeter(metricId + "-BytesPerSec", "bytes", TimeUnit.SECONDS)
+  val tags = Map("clientId" -> metricId.clientId,
+    "brokerHost" -> metricId.brokerHost,
+    "brokerPort" -> metricId.brokerPort.toString)
+
+  val requestRate = newMeter("RequestsPerSec", "requests", TimeUnit.SECONDS, tags)
+
+  val byteRate = newMeter("BytesPerSec", "bytes", TimeUnit.SECONDS, tags)
 }
 
-case class ClientIdBrokerTopicPartition(clientId: String, brokerInfo: String, topic: String, partitionId: Int) {
-  override def toString = "%s-%s-%s-%d".format(clientId, brokerInfo, topic, partitionId)
+case class ClientIdTopicPartition(clientId: String, topic: String, partitionId: Int) {
+  override def toString = "%s-%s-%d".format(clientId, topic, partitionId)
 }
 
diff --git a/core/src/main/scala/kafka/server/BrokerStates.scala b/core/src/main/scala/kafka/server/BrokerStates.scala
new file mode 100644
index 0000000000000..e6ee77ebde5c2
--- /dev/null
+++ b/core/src/main/scala/kafka/server/BrokerStates.scala
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+/**
+ * Broker states are the possible state that a kafka broker can be in.
+ * A broker should be only in one state at a time.
+ * The expected state transition with the following defined states is:
+ *
+ *                +-----------+
+ *                |Not Running|
+ *                +-----+-----+
+ *                      |
+ *                      v
+ *                +-----+-----+
+ *                |Starting   +--+
+ *                +-----+-----+  | +----+------------+
+ *                      |        +>+RecoveringFrom   |
+ *                      v          |UncleanShutdown  |
+ * +----------+     +-----+-----+  +-------+---------+
+ * |RunningAs |     |RunningAs  |            |
+ * |Controller+<--->+Broker     +<-----------+
+ * +----------+     +-----+-----+
+ *        |              |
+ *        |              v
+ *        |       +-----+------------+
+ *        |-----> |PendingControlled |
+ *                |Shutdown          |
+ *                +-----+------------+
+ *                      |
+ *                      v
+ *               +-----+----------+
+ *               |BrokerShutting  |
+ *               |Down            |
+ *               +-----+----------+
+ *                     |
+ *                     v
+ *               +-----+-----+
+ *               |Not Running|
+ *               +-----------+
+ *
+ * Custom states is also allowed for cases where there are custom kafka states for different scenarios.
+ */
+sealed trait BrokerStates { def state: Byte }
+case object NotRunning extends BrokerStates { val state: Byte = 0 }
+case object Starting extends BrokerStates { val state: Byte = 1 }
+case object RecoveringFromUncleanShutdown extends BrokerStates { val state: Byte = 2 }
+case object RunningAsBroker extends BrokerStates { val state: Byte = 3 }
+case object RunningAsController extends BrokerStates { val state: Byte = 4 }
+case object PendingControlledShutdown extends BrokerStates { val state: Byte = 6 }
+case object BrokerShuttingDown extends BrokerStates { val state: Byte = 7 }
+
+
+case class BrokerState() {
+  @volatile var currentState: Byte = NotRunning.state
+
+  def newState(newState: BrokerStates) {
+    this.newState(newState.state)
+  }
+
+  // Allowing undefined custom state
+  def newState(newState: Byte) {
+    currentState = newState
+  }
+}
diff --git a/core/src/main/scala/kafka/server/DelayedFetch.scala b/core/src/main/scala/kafka/server/DelayedFetch.scala
new file mode 100644
index 0000000000000..dd602ee2e65c2
--- /dev/null
+++ b/core/src/main/scala/kafka/server/DelayedFetch.scala
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.api.FetchResponsePartitionData
+import kafka.api.PartitionFetchInfo
+import kafka.common.UnknownTopicOrPartitionException
+import kafka.common.NotLeaderForPartitionException
+import kafka.common.TopicAndPartition
+
+import scala.collection._
+
+case class FetchPartitionStatus(startOffsetMetadata: LogOffsetMetadata, fetchInfo: PartitionFetchInfo) {
+
+  override def toString = "[startOffsetMetadata: " + startOffsetMetadata + ", " +
+                          "fetchInfo: " + fetchInfo + "]"
+}
+
+/**
+ * The fetch metadata maintained by the delayed fetch operation
+ */
+case class FetchMetadata(fetchMinBytes: Int,
+                         fetchOnlyLeader: Boolean,
+                         fetchOnlyCommitted: Boolean,
+                         fetchPartitionStatus: Map[TopicAndPartition, FetchPartitionStatus]) {
+
+  override def toString = "[minBytes: " + fetchMinBytes + ", " +
+                          "onlyLeader:" + fetchOnlyLeader + ", "
+                          "onlyCommitted: " + fetchOnlyCommitted + ", "
+                          "partitionStatus: " + fetchPartitionStatus + "]"
+}
+/**
+ * A delayed fetch operation that can be created by the replica manager and watched
+ * in the fetch operation purgatory
+ */
+class DelayedFetch(delayMs: Long,
+                   fetchMetadata: FetchMetadata,
+                   replicaManager: ReplicaManager,
+                   responseCallback: Map[TopicAndPartition, FetchResponsePartitionData] => Unit)
+  extends DelayedOperation(delayMs) {
+
+  /**
+   * The operation can be completed if:
+   *
+   * Case A: This broker is no longer the leader for some partitions it tries to fetch
+   * Case B: This broker does not know of some partitions it tries to fetch
+   * Case C: The fetch offset locates not on the last segment of the log
+   * Case D: The accumulated bytes from all the fetching partitions exceeds the minimum bytes
+   *
+   * Upon completion, should return whatever data is available for each valid partition
+   */
+  override def tryComplete() : Boolean = {
+    var accumulatedSize = 0
+    fetchMetadata.fetchPartitionStatus.foreach {
+      case (topicAndPartition, fetchStatus) =>
+        val fetchOffset = fetchStatus.startOffsetMetadata
+        try {
+          if (fetchOffset != LogOffsetMetadata.UnknownOffsetMetadata) {
+            val replica = replicaManager.getLeaderReplicaIfLocal(topicAndPartition.topic, topicAndPartition.partition)
+            val endOffset =
+              if (fetchMetadata.fetchOnlyCommitted)
+                replica.highWatermark
+              else
+                replica.logEndOffset
+
+            if (endOffset.offsetOnOlderSegment(fetchOffset)) {
+              // Case C, this can happen when the new fetch operation is on a truncated leader
+              debug("Satisfying fetch %s since it is fetching later segments of partition %s.".format(fetchMetadata, topicAndPartition))
+              return forceComplete()
+            } else if (fetchOffset.offsetOnOlderSegment(endOffset)) {
+              // Case C, this can happen when the fetch operation is falling behind the current segment
+              // or the partition has just rolled a new segment
+              debug("Satisfying fetch %s immediately since it is fetching older segments.".format(fetchMetadata))
+              return forceComplete()
+            } else if (fetchOffset.precedes(endOffset)) {
+              // we need take the partition fetch size as upper bound when accumulating the bytes
+              accumulatedSize += math.min(endOffset.positionDiff(fetchOffset), fetchStatus.fetchInfo.fetchSize)
+            }
+          }
+        } catch {
+          case utpe: UnknownTopicOrPartitionException => // Case B
+            debug("Broker no longer know of %s, satisfy %s immediately".format(topicAndPartition, fetchMetadata))
+            return forceComplete()
+          case nle: NotLeaderForPartitionException =>  // Case A
+            debug("Broker is no longer the leader of %s, satisfy %s immediately".format(topicAndPartition, fetchMetadata))
+            return forceComplete()
+        }
+    }
+
+    // Case D
+    if (accumulatedSize >= fetchMetadata.fetchMinBytes)
+      forceComplete()
+    else
+      false
+  }
+
+  /**
+   * Upon completion, read whatever data is available and pass to the complete callback
+   */
+  override def onComplete() {
+    val logReadResults = replicaManager.readFromLocalLog(fetchMetadata.fetchOnlyLeader,
+      fetchMetadata.fetchOnlyCommitted,
+      fetchMetadata.fetchPartitionStatus.mapValues(status => status.fetchInfo))
+
+    val fetchPartitionData = logReadResults.mapValues(result =>
+      FetchResponsePartitionData(result.errorCode, result.hw, result.info.messageSet))
+
+    responseCallback(fetchPartitionData)
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/server/DelayedOperation.scala b/core/src/main/scala/kafka/server/DelayedOperation.scala
new file mode 100644
index 0000000000000..fc06b01cad3a0
--- /dev/null
+++ b/core/src/main/scala/kafka/server/DelayedOperation.scala
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.utils._
+import kafka.metrics.KafkaMetricsGroup
+
+import java.util
+import java.util.concurrent._
+import java.util.concurrent.atomic._
+import scala.collection._
+
+import com.yammer.metrics.core.Gauge
+
+
+/**
+ * An operation whose processing needs to be delayed for at most the given delayMs. For example
+ * a delayed produce operation could be waiting for specified number of acks; or
+ * a delayed fetch operation could be waiting for a given number of bytes to accumulate.
+ *
+ * The logic upon completing a delayed operation is defined in onComplete() and will be called exactly once.
+ * Once an operation is completed, isCompleted() will return true. onComplete() can be triggered by either
+ * forceComplete(), which forces calling onComplete() after delayMs if the operation is not yet completed,
+ * or tryComplete(), which first checks if the operation can be completed or not now, and if yes calls
+ * forceComplete().
+ *
+ * A subclass of DelayedOperation needs to provide an implementation of both onComplete() and tryComplete().
+ */
+abstract class DelayedOperation(delayMs: Long) extends DelayedItem(delayMs) {
+  private val completed = new AtomicBoolean(false)
+
+  /*
+   * Force completing the delayed operation, if not already completed.
+   * This function can be triggered when
+   *
+   * 1. The operation has been verified to be completable inside tryComplete()
+   * 2. The operation has expired and hence needs to be completed right now
+   *
+   * Return true iff the operation is completed by the caller: note that
+   * concurrent threads can try to complete the same operation, but only
+   * the first thread will succeed in completing the operation and return
+   * true, others will still return false
+   */
+  def forceComplete(): Boolean = {
+    if (completed.compareAndSet(false, true)) {
+      onComplete()
+      true
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Check if the delayed operation is already completed
+   */
+  def isCompleted(): Boolean = completed.get()
+
+  /**
+   * Process for completing an operation; This function needs to be defined
+   * in subclasses and will be called exactly once in forceComplete()
+   */
+  def onComplete(): Unit
+
+  /*
+   * Try to complete the delayed operation by first checking if the operation
+   * can be completed by now. If yes execute the completion logic by calling
+   * forceComplete() and return true iff forceComplete returns true; otherwise return false
+   *
+   * This function needs to be defined in subclasses
+   */
+  def tryComplete(): Boolean
+}
+
+/**
+ * A helper purgatory class for bookkeeping delayed operations with a timeout, and expiring timed out operations.
+ */
+class DelayedOperationPurgatory[T <: DelayedOperation](brokerId: Int = 0, purgeInterval: Int = 1000)
+        extends Logging with KafkaMetricsGroup {
+
+  /* a list of operation watching keys */
+  private val watchersForKey = new Pool[Any, Watchers](Some((key: Any) => new Watchers))
+
+  /* background thread expiring operations that have timed out */
+  private val expirationReaper = new ExpiredOperationReaper
+
+  newGauge(
+    "PurgatorySize",
+    new Gauge[Int] {
+      def value = watched()
+    }
+  )
+
+  newGauge(
+    "NumDelayedOperations",
+    new Gauge[Int] {
+      def value = delayed()
+    }
+  )
+
+  expirationReaper.start()
+
+  /**
+   * Check if the operation can be completed, if not watch it based on the given watch keys
+   *
+   * Note that a delayed operation can be watched on multiple keys. It is possible that
+   * an operation is completed after it has been added to the watch list for some, but
+   * not all of the keys. In this case, the operation is considered completed and won't
+   * be added to the watch list of the remaining keys. The expiration reaper thread will
+   * remove this operation from any watcher list in which the operation exists.
+   *
+   * @param operation the delayed operation to be checked
+   * @param watchKeys keys for bookkeeping the operation
+   * @return true iff the delayed operations can be completed by the caller
+   */
+  def tryCompleteElseWatch(operation: T, watchKeys: Seq[Any]): Boolean = {
+    for(key <- watchKeys) {
+      // if the operation is already completed, stopping adding it to
+      // any further lists and return false
+      if (operation.isCompleted())
+        return false
+      val watchers = watchersFor(key)
+      // if the operation can by completed by myself, stop adding it to
+      // any further lists and return true immediately
+      if(operation synchronized operation.tryComplete()) {
+        return true
+      } else {
+        watchers.watch(operation)
+      }
+    }
+
+    // if it cannot be completed by now and hence is watched, add to the expire queue also
+    if (! operation.isCompleted()) {
+      expirationReaper.enqueue(operation)
+    }
+
+    false
+  }
+
+  /**
+   * Check if some some delayed operations can be completed with the given watch key,
+   * and if yes complete them.
+   *
+   * @return the number of completed operations during this process
+   */
+  def checkAndComplete(key: Any): Int = {
+    val watchers = watchersForKey.get(key)
+    if(watchers == null)
+      0
+    else
+      watchers.tryCompleteWatched()
+  }
+
+  /**
+   * Return the total size of watch lists the purgatory. Since an operation may be watched
+   * on multiple lists, and some of its watched entries may still be in the watch lists
+   * even when it has been completed, this number may be larger than the number of real operations watched
+   */
+  def watched() = watchersForKey.values.map(_.watched).sum
+
+  /**
+   * Return the number of delayed operations in the expiry queue
+   */
+  def delayed() = expirationReaper.delayed
+
+  /*
+   * Return the watch list of the given key
+   */
+  private def watchersFor(key: Any) = watchersForKey.getAndMaybePut(key)
+
+  /**
+   * Shutdown the expire reaper thread
+   */
+  def shutdown() {
+    expirationReaper.shutdown()
+  }
+
+  /**
+   * A linked list of watched delayed operations based on some key
+   */
+  private class Watchers {
+    private val operations = new util.LinkedList[T]
+
+    def watched = operations.size()
+
+    // add the element to watch
+    def watch(t: T) {
+      synchronized {
+        operations.add(t)
+      }
+    }
+
+    // traverse the list and try to complete some watched elements
+    def tryCompleteWatched(): Int = {
+      var completed = 0
+      synchronized {
+        val iter = operations.iterator()
+        while(iter.hasNext) {
+          val curr = iter.next
+          if (curr.isCompleted()) {
+            // another thread has completed this operation, just remove it
+            iter.remove()
+          } else {
+            if(curr synchronized curr.tryComplete()) {
+              iter.remove()
+              completed += 1
+            }
+          }
+        }
+      }
+      completed
+    }
+
+    // traverse the list and purge elements that are already completed by others
+    def purgeCompleted(): Int = {
+      var purged = 0
+      synchronized {
+        val iter = operations.iterator()
+        while (iter.hasNext) {
+          val curr = iter.next
+          if(curr.isCompleted()) {
+            iter.remove()
+            purged += 1
+          }
+        }
+      }
+      purged
+    }
+  }
+
+  /**
+   * A background reaper to expire delayed operations that have timed out
+   */
+  private class ExpiredOperationReaper extends ShutdownableThread(
+    "ExpirationReaper-%d".format(brokerId),
+    false) {
+
+    /* The queue storing all delayed operations */
+    private val delayedQueue = new DelayQueue[T]
+
+    /*
+     * Return the number of delayed operations kept by the reaper
+     */
+    def delayed() = delayedQueue.size()
+
+    /*
+     * Add an operation to be expired
+     */
+    def enqueue(t: T) {
+      delayedQueue.add(t)
+    }
+
+    /**
+     * Try to get the next expired event and force completing it
+     */
+    private def expireNext() {
+      val curr = delayedQueue.poll(200L, TimeUnit.MILLISECONDS)
+      if (curr != null.asInstanceOf[T]) {
+        // if there is an expired operation, try to force complete it
+        if (curr synchronized curr.forceComplete()) {
+          debug("Force complete expired delayed operation %s".format(curr))
+        }
+      }
+    }
+
+    /**
+     * Delete all satisfied events from the delay queue and the watcher lists
+     */
+    private def purgeCompleted(): Int = {
+      var purged = 0
+
+      // purge the delayed queue
+      val iter = delayedQueue.iterator()
+      while (iter.hasNext) {
+        val curr = iter.next()
+        if (curr.isCompleted()) {
+          iter.remove()
+          purged += 1
+        }
+      }
+
+      purged
+    }
+
+    override def doWork() {
+      // try to get the next expired operation and force completing it
+      expireNext()
+      // see if we need to purge the watch lists
+      if (DelayedOperationPurgatory.this.watched() >= purgeInterval) {
+        debug("Begin purging watch lists")
+        val purged = watchersForKey.values.map(_.purgeCompleted()).sum
+        debug("Purged %d elements from watch lists.".format(purged))
+      }
+      // see if we need to purge the delayed operation queue
+      if (delayed() >= purgeInterval) {
+        debug("Begin purging delayed queue")
+        val purged = purgeCompleted()
+        debug("Purged %d operations from delayed queue.".format(purged))
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/kafka/server/DelayedOperationKey.scala b/core/src/main/scala/kafka/server/DelayedOperationKey.scala
new file mode 100644
index 0000000000000..fb7e9ed5c16dd
--- /dev/null
+++ b/core/src/main/scala/kafka/server/DelayedOperationKey.scala
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.common.TopicAndPartition
+
+/**
+ * Keys used for delayed operation metrics recording
+ */
+trait DelayedOperationKey {
+  def keyLabel: String
+}
+
+object DelayedOperationKey {
+  val globalLabel = "All"
+}
+
+case class TopicPartitionOperationKey(topic: String, partition: Int) extends DelayedOperationKey {
+
+  def this(topicAndPartition: TopicAndPartition) = this(topicAndPartition.topic, topicAndPartition.partition)
+
+  override def keyLabel = "%s-%d".format(topic, partition)
+}
diff --git a/core/src/main/scala/kafka/server/DelayedProduce.scala b/core/src/main/scala/kafka/server/DelayedProduce.scala
new file mode 100644
index 0000000000000..c229088eb4f3d
--- /dev/null
+++ b/core/src/main/scala/kafka/server/DelayedProduce.scala
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+
+import kafka.api.ProducerResponseStatus
+import kafka.common.ErrorMapping
+import kafka.common.TopicAndPartition
+
+import scala.collection._
+
+case class ProducePartitionStatus(requiredOffset: Long, responseStatus: ProducerResponseStatus) {
+  @volatile var acksPending = false
+
+  override def toString = "[acksPending: %b, error: %d, startOffset: %d, requiredOffset: %d]"
+    .format(acksPending, responseStatus.error, responseStatus.offset, requiredOffset)
+}
+
+/**
+ * The produce metadata maintained by the delayed produce operation
+ */
+case class ProduceMetadata(produceRequiredAcks: Short,
+                           produceStatus: Map[TopicAndPartition, ProducePartitionStatus]) {
+
+  override def toString = "[requiredAcks: %d, partitionStatus: %s]"
+    .format(produceRequiredAcks, produceStatus)
+}
+
+/**
+ * A delayed produce operation that can be created by the replica manager and watched
+ * in the produce operation purgatory
+ */
+class DelayedProduce(delayMs: Long,
+                     produceMetadata: ProduceMetadata,
+                     replicaManager: ReplicaManager,
+                     responseCallback: Map[TopicAndPartition, ProducerResponseStatus] => Unit)
+  extends DelayedOperation(delayMs) {
+
+  // first update the acks pending variable according to the error code
+  produceMetadata.produceStatus.foreach { case (topicAndPartition, status) =>
+    if (status.responseStatus.error == ErrorMapping.NoError) {
+      // Timeout error state will be cleared when required acks are received
+      status.acksPending = true
+      status.responseStatus.error = ErrorMapping.RequestTimedOutCode
+    } else {
+      status.acksPending = false
+    }
+
+    trace("Initial partition status for %s is %s".format(topicAndPartition, status))
+  }
+
+  /**
+   * The delayed produce operation can be completed if every partition
+   * it produces to is satisfied by one of the following:
+   *
+   * Case A: This broker is no longer the leader: set an error in response
+   * Case B: This broker is the leader:
+   *   B.1 - If there was a local error thrown while checking if at least requiredAcks
+   *         replicas have caught up to this operation: set an error in response
+   *   B.2 - Otherwise, set the response with no error.
+   */
+  override def tryComplete(): Boolean = {
+    // check for each partition if it still has pending acks
+    produceMetadata.produceStatus.foreach { case (topicAndPartition, status) =>
+      trace("Checking produce satisfaction for %s, current status %s"
+        .format(topicAndPartition, status))
+      // skip those partitions that have already been satisfied
+      if (status.acksPending) {
+        val partitionOpt = replicaManager.getPartition(topicAndPartition.topic, topicAndPartition.partition)
+        val (hasEnough, errorCode) = partitionOpt match {
+          case Some(partition) =>
+            partition.checkEnoughReplicasReachOffset(
+              status.requiredOffset,
+              produceMetadata.produceRequiredAcks)
+          case None =>
+            // Case A
+            (false, ErrorMapping.UnknownTopicOrPartitionCode)
+        }
+        if (errorCode != ErrorMapping.NoError) {
+          // Case B.1
+          status.acksPending = false
+          status.responseStatus.error = errorCode
+        } else if (hasEnough) {
+          // Case B.2
+          status.acksPending = false
+          status.responseStatus.error = ErrorMapping.NoError
+        }
+      }
+    }
+
+    // check if each partition has satisfied at lease one of case A and case B
+    if (! produceMetadata.produceStatus.values.exists(p => p.acksPending))
+      forceComplete()
+    else
+      false
+  }
+
+  /**
+   * Upon completion, return the current response status along with the error code per partition
+   */
+  override def onComplete() {
+    val responseStatus = produceMetadata.produceStatus.mapValues(status => status.responseStatus)
+    responseCallback(responseStatus)
+  }
+}
diff --git a/core/src/main/scala/kafka/server/FetchDataInfo.scala b/core/src/main/scala/kafka/server/FetchDataInfo.scala
new file mode 100644
index 0000000000000..26f278f9b75b1
--- /dev/null
+++ b/core/src/main/scala/kafka/server/FetchDataInfo.scala
@@ -0,0 +1,22 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.message.MessageSet
+
+case class FetchDataInfo(fetchOffset: LogOffsetMetadata, messageSet: MessageSet)
diff --git a/core/src/main/scala/kafka/server/KafkaApis.scala b/core/src/main/scala/kafka/server/KafkaApis.scala
index c56ad503d83e3..2a1c0326b6e69 100644
--- a/core/src/main/scala/kafka/server/KafkaApis.scala
+++ b/core/src/main/scala/kafka/server/KafkaApis.scala
@@ -17,46 +17,32 @@
 
 package kafka.server
 
-import kafka.admin.AdminUtils
 import kafka.api._
-import kafka.message._
-import kafka.network._
-import kafka.log._
-import kafka.utils.ZKGroupTopicDirs
-import scala.collection._
-import java.util.concurrent.TimeUnit
-import java.util.concurrent.atomic._
-import kafka.metrics.KafkaMetricsGroup
-import org.I0Itec.zkclient.ZkClient
 import kafka.common._
-import kafka.utils.{ZkUtils, Pool, SystemTime, Logging}
+import kafka.log._
+import kafka.network._
+import kafka.admin.AdminUtils
 import kafka.network.RequestChannel.Response
-import kafka.cluster.Broker
 import kafka.controller.KafkaController
+import kafka.utils.{SystemTime, Logging}
 
+import scala.collection._
+
+import org.I0Itec.zkclient.ZkClient
 
 /**
  * Logic to handle the various Kafka requests
  */
 class KafkaApis(val requestChannel: RequestChannel,
                 val replicaManager: ReplicaManager,
+                val offsetManager: OffsetManager,
                 val zkClient: ZkClient,
                 val brokerId: Int,
                 val config: KafkaConfig,
                 val controller: KafkaController) extends Logging {
 
-  private val producerRequestPurgatory =
-    new ProducerRequestPurgatory(replicaManager.config.producerPurgatoryPurgeIntervalRequests)
-  private val fetchRequestPurgatory =
-    new FetchRequestPurgatory(requestChannel, replicaManager.config.fetchPurgatoryPurgeIntervalRequests)
-  private val delayedRequestMetrics = new DelayedRequestMetrics
-  /* following 3 data structures are updated by the update metadata request
-  * and is queried by the topic metadata request. */
-  var metadataCache: mutable.Map[TopicAndPartition, PartitionStateInfo] =
-    new mutable.HashMap[TopicAndPartition, PartitionStateInfo]()
-  private val aliveBrokers: mutable.Map[Int, Broker] = new mutable.HashMap[Int, Broker]()
-  private val partitionMetadataLock = new Object
   this.logIdent = "[KafkaApi-%d] ".format(brokerId)
+  val metadataCache = new MetadataCache
 
   /**
    * Top-level method that handles all requests and multiplexes to the right api
@@ -75,6 +61,7 @@ class KafkaApis(val requestChannel: RequestChannel,
         case RequestKeys.ControlledShutdownKey => handleControlledShutdownRequest(request)
         case RequestKeys.OffsetCommitKey => handleOffsetCommitRequest(request)
         case RequestKeys.OffsetFetchKey => handleOffsetFetchRequest(request)
+        case RequestKeys.ConsumerMetadataKey => handleConsumerMetadataRequest(request)
         case requestId => throw new KafkaException("Unknown api code " + requestId)
       }
     } catch {
@@ -85,19 +72,13 @@ class KafkaApis(val requestChannel: RequestChannel,
       request.apiLocalCompleteTimeMs = SystemTime.milliseconds
   }
 
-  // ensureTopicExists is only for client facing requests
-  private def ensureTopicExists(topic: String) = {
-    if(!metadataCache.exists { case(topicAndPartition, partitionStateInfo) => topicAndPartition.topic.equals(topic)} )
-      throw new UnknownTopicOrPartitionException("Topic " + topic + " either doesn't exist or is in the process of being deleted")
-  }
-
   def handleLeaderAndIsrRequest(request: RequestChannel.Request) {
     // ensureTopicExists is only for client facing requests
     // We can't have the ensureTopicExists check here since the controller sends it as an advisory to all brokers so they
     // stop serving data to clients for the topic being deleted
     val leaderAndIsrRequest = request.requestObj.asInstanceOf[LeaderAndIsrRequest]
     try {
-      val (response, error) = replicaManager.becomeLeaderOrFollower(leaderAndIsrRequest)
+      val (response, error) = replicaManager.becomeLeaderOrFollower(leaderAndIsrRequest, offsetManager)
       val leaderAndIsrResponse = new LeaderAndIsrResponse(leaderAndIsrRequest.correlationId, response, error)
       requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(leaderAndIsrResponse)))
     } catch {
@@ -120,47 +101,8 @@ class KafkaApis(val requestChannel: RequestChannel,
 
   def handleUpdateMetadataRequest(request: RequestChannel.Request) {
     val updateMetadataRequest = request.requestObj.asInstanceOf[UpdateMetadataRequest]
-    // ensureTopicExists is only for client facing requests
-    // We can't have the ensureTopicExists check here since the controller sends it as an advisory to all brokers so they
-    // stop serving data to clients for the topic being deleted
-    val stateChangeLogger = replicaManager.stateChangeLogger
-    if(updateMetadataRequest.controllerEpoch < replicaManager.controllerEpoch) {
-      val stateControllerEpochErrorMessage = ("Broker %d received update metadata request with correlation id %d from an " +
-        "old controller %d with epoch %d. Latest known controller epoch is %d").format(brokerId,
-        updateMetadataRequest.correlationId, updateMetadataRequest.controllerId, updateMetadataRequest.controllerEpoch,
-        replicaManager.controllerEpoch)
-      stateChangeLogger.warn(stateControllerEpochErrorMessage)
-      throw new ControllerMovedException(stateControllerEpochErrorMessage)
-    }
-    partitionMetadataLock synchronized {
-      replicaManager.controllerEpoch = updateMetadataRequest.controllerEpoch
-      // cache the list of alive brokers in the cluster
-      updateMetadataRequest.aliveBrokers.foreach(b => aliveBrokers.put(b.id, b))
-      updateMetadataRequest.partitionStateInfos.foreach { partitionState =>
-        metadataCache.put(partitionState._1, partitionState._2)
-        if(stateChangeLogger.isTraceEnabled)
-          stateChangeLogger.trace(("Broker %d cached leader info %s for partition %s in response to UpdateMetadata request " +
-            "sent by controller %d epoch %d with correlation id %d").format(brokerId, partitionState._2, partitionState._1,
-            updateMetadataRequest.controllerId, updateMetadataRequest.controllerEpoch, updateMetadataRequest.correlationId))
-      }
-      // remove the topics that don't exist in the UpdateMetadata request since those are the topics that are
-      // currently being deleted by the controller
-      val topicsKnownToThisBroker = metadataCache.map{
-        case(topicAndPartition, partitionStateInfo) => topicAndPartition.topic }.toSet
-      val topicsKnownToTheController = updateMetadataRequest.partitionStateInfos.map {
-        case(topicAndPartition, partitionStateInfo) => topicAndPartition.topic }.toSet
-      val deletedTopics = topicsKnownToThisBroker -- topicsKnownToTheController
-      val partitionsToBeDeleted = metadataCache.filter {
-        case(topicAndPartition, partitionStateInfo) => deletedTopics.contains(topicAndPartition.topic)
-      }.keySet
-      partitionsToBeDeleted.foreach { partition =>
-        metadataCache.remove(partition)
-        if(stateChangeLogger.isTraceEnabled)
-          stateChangeLogger.trace(("Broker %d deleted partition %s from metadata cache in response to UpdateMetadata request " +
-            "sent by controller %d epoch %d with correlation id %d").format(brokerId, partition,
-            updateMetadataRequest.controllerId, updateMetadataRequest.controllerEpoch, updateMetadataRequest.correlationId))
-      }
-    }
+    replicaManager.maybeUpdateMetadataCache(updateMetadataRequest, metadataCache)
+
     val updateMetadataResponse = new UpdateMetadataResponse(updateMetadataRequest.correlationId)
     requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(updateMetadataResponse)))
   }
@@ -176,20 +118,37 @@ class KafkaApis(val requestChannel: RequestChannel,
     requestChannel.sendResponse(new Response(request, new BoundedByteBufferSend(controlledShutdownResponse)))
   }
 
+
   /**
-   * Check if a partitionData from a produce request can unblock any
-   * DelayedFetch requests.
+   * Handle an offset commit request
    */
-  def maybeUnblockDelayedFetchRequests(topic: String, partition: Int, messageSizeInBytes: Int) {
-    val satisfied =  fetchRequestPurgatory.update(RequestKey(topic, partition), messageSizeInBytes)
-    trace("Producer request to (%s-%d) unblocked %d fetch requests.".format(topic, partition, satisfied.size))
-
-    // send any newly unblocked responses
-    for(fetchReq <- satisfied) {
-      val topicData = readMessageSets(fetchReq.fetch)
-      val response = FetchResponse(fetchReq.fetch.correlationId, topicData)
-      requestChannel.sendResponse(new RequestChannel.Response(fetchReq.request, new FetchResponseSend(response)))
+  def handleOffsetCommitRequest(request: RequestChannel.Request) {
+    val offsetCommitRequest = request.requestObj.asInstanceOf[OffsetCommitRequest]
+
+    // the callback for sending the response
+    def sendResponseCallback(commitStatus: immutable.Map[TopicAndPartition, Short]) {
+      commitStatus.foreach { case (topicAndPartition, errorCode) =>
+        // we only print warnings for known errors here; only replica manager could see an unknown
+        // exception while trying to write the offset message to the local log, and it will log
+        // an error message and write the error code in this case; hence it can be ignored here
+        if (errorCode != ErrorMapping.NoError && errorCode != ErrorMapping.UnknownCode) {
+          debug("Offset commit request with correlation id %d from client %s on partition %s failed due to %s"
+            .format(offsetCommitRequest.correlationId, offsetCommitRequest.clientId,
+            topicAndPartition, ErrorMapping.exceptionNameFor(errorCode)))
+        }
+      }
+
+      val response = OffsetCommitResponse(commitStatus, offsetCommitRequest.correlationId)
+      requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
     }
+
+    // call offset manager to store offsets
+    offsetManager.storeOffsets(
+      offsetCommitRequest.groupId,
+      offsetCommitRequest.consumerId,
+      offsetCommitRequest.groupGenerationId,
+      offsetCommitRequest.requestInfo,
+      sendResponseCallback)
   }
 
   /**
@@ -197,131 +156,54 @@ class KafkaApis(val requestChannel: RequestChannel,
    */
   def handleProducerRequest(request: RequestChannel.Request) {
     val produceRequest = request.requestObj.asInstanceOf[ProducerRequest]
-    val sTime = SystemTime.milliseconds
-    val localProduceResults = appendToLocalLog(produceRequest)
-    debug("Produce to local log in %d ms".format(SystemTime.milliseconds - sTime))
-
-    val numPartitionsInError = localProduceResults.count(_.error.isDefined)
-    produceRequest.data.foreach(partitionAndData =>
-      maybeUnblockDelayedFetchRequests(partitionAndData._1.topic, partitionAndData._1.partition, partitionAndData._2.sizeInBytes))
-
-    val allPartitionHaveReplicationFactorOne =
-      !produceRequest.data.keySet.exists(
-        m => replicaManager.getReplicationFactorForPartition(m.topic, m.partition) != 1)
-    if(produceRequest.requiredAcks == 0) {
-      // no operation needed if producer request.required.acks = 0; however, if there is any exception in handling the request, since
-      // no response is expected by the producer the handler will send a close connection response to the socket server
-      // to close the socket so that the producer client will know that some exception has happened and will refresh its metadata
-      if (numPartitionsInError != 0) {
-        info(("Send the close connection response due to error handling produce request " +
-          "[clientId = %s, correlationId = %s, topicAndPartition = %s] with Ack=0")
-          .format(produceRequest.clientId, produceRequest.correlationId, produceRequest.topicPartitionMessageSizeMap.keySet.mkString(",")))
-        requestChannel.closeConnection(request.processor, request)
+
+    // the callback for sending the response
+    def sendResponseCallback(responseStatus: Map[TopicAndPartition, ProducerResponseStatus]) {
+      var errorInResponse = false
+      responseStatus.foreach { case (topicAndPartition, status) =>
+        // we only print warnings for known errors here; if it is unknown, it will cause
+        // an error message in the replica manager
+        if (status.error != ErrorMapping.NoError && status.error != ErrorMapping.UnknownCode) {
+          debug("Produce request with correlation id %d from client %s on partition %s failed due to %s"
+            .format(produceRequest.correlationId, produceRequest.clientId,
+            topicAndPartition, ErrorMapping.exceptionNameFor(status.error)))
+          errorInResponse = true
+        }
+      }
+
+      if (produceRequest.requiredAcks == 0) {
+        // no operation needed if producer request.required.acks = 0; however, if there is any error in handling
+        // the request, since no response is expected by the producer, the server will close socket server so that
+        // the producer client will know that some error has happened and will refresh its metadata
+        if (errorInResponse) {
+          info("Close connection due to error handling produce request with correlation id %d from client id %s with ack=0"
+            .format(produceRequest.correlationId, produceRequest.clientId))
+          requestChannel.closeConnection(request.processor, request)
+        } else {
+          requestChannel.noOperation(request.processor, request)
+        }
       } else {
-        requestChannel.noOperation(request.processor, request)
+        val response = ProducerResponse(produceRequest.correlationId, responseStatus)
+        requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
       }
-    } else if (produceRequest.requiredAcks == 1 ||
-        produceRequest.numPartitions <= 0 ||
-        allPartitionHaveReplicationFactorOne ||
-        numPartitionsInError == produceRequest.numPartitions) {
-      val statuses = localProduceResults.map(r => r.key -> ProducerResponseStatus(r.errorCode, r.start)).toMap
-      val response = ProducerResponse(produceRequest.correlationId, statuses)
-      requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
-    } else {
-      // create a list of (topic, partition) pairs to use as keys for this delayed request
-      val producerRequestKeys = produceRequest.data.keys.map(
-        topicAndPartition => new RequestKey(topicAndPartition)).toSeq
-      val statuses = localProduceResults.map(r => r.key -> ProducerResponseStatus(r.errorCode, r.end + 1)).toMap
-      val delayedProduce = new DelayedProduce(producerRequestKeys, 
-                                              request,
-                                              statuses,
-                                              produceRequest, 
-                                              produceRequest.ackTimeoutMs.toLong)
-      producerRequestPurgatory.watch(delayedProduce)
-
-      /*
-       * Replica fetch requests may have arrived (and potentially satisfied)
-       * delayedProduce requests while they were being added to the purgatory.
-       * Here, we explicitly check if any of them can be satisfied.
-       */
-      var satisfiedProduceRequests = new mutable.ArrayBuffer[DelayedProduce]
-      producerRequestKeys.foreach(key =>
-        satisfiedProduceRequests ++=
-          producerRequestPurgatory.update(key, key))
-      debug(satisfiedProduceRequests.size +
-        " producer requests unblocked during produce to local log.")
-      satisfiedProduceRequests.foreach(_.respond())
-      // we do not need the data anymore
-      produceRequest.emptyData()
     }
-  }
-  
-  case class ProduceResult(key: TopicAndPartition, start: Long, end: Long, error: Option[Throwable] = None) {
-    def this(key: TopicAndPartition, throwable: Throwable) = 
-      this(key, -1L, -1L, Some(throwable))
-    
-    def errorCode = error match {
-      case None => ErrorMapping.NoError
-      case Some(error) => ErrorMapping.codeFor(error.getClass.asInstanceOf[Class[Throwable]])
-    }
-  }
 
-  /**
-   * Helper method for handling a parsed producer request
-   */
-  private def appendToLocalLog(producerRequest: ProducerRequest): Iterable[ProduceResult] = {
-    val partitionAndData: Map[TopicAndPartition, MessageSet] = producerRequest.data
-    trace("Append [%s] to local log ".format(partitionAndData.toString))
-    partitionAndData.map {case (topicAndPartition, messages) =>
-      // update stats for incoming bytes rate
-      BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).bytesInRate.mark(messages.sizeInBytes)
-      BrokerTopicStats.getBrokerAllTopicsStats.bytesInRate.mark(messages.sizeInBytes)
-
-      try {
-        ensureTopicExists(topicAndPartition.topic)
-        val partitionOpt = replicaManager.getPartition(topicAndPartition.topic, topicAndPartition.partition)
-        val info =
-          partitionOpt match {
-            case Some(partition) => partition.appendMessagesToLeader(messages.asInstanceOf[ByteBufferMessageSet])
-            case None => throw new UnknownTopicOrPartitionException("Partition %s doesn't exist on %d"
-              .format(topicAndPartition, brokerId))
-
-          }
-        val numAppendedMessages = if (info.firstOffset == -1L || info.lastOffset == -1L) 0 else (info.lastOffset - info.firstOffset + 1)
-
-        // update stats for successfully appended messages
-        BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).logBytesAppendRate.mark(messages.sizeInBytes)
-        BrokerTopicStats.getBrokerAllTopicsStats.logBytesAppendRate.mark(messages.sizeInBytes)
-        BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).messagesInRate.mark(numAppendedMessages)
-        BrokerTopicStats.getBrokerAllTopicsStats.messagesInRate.mark(numAppendedMessages)
-
-        trace("%d bytes written to log %s-%d beginning at offset %d and ending at offset %d"
-              .format(messages.size, topicAndPartition.topic, topicAndPartition.partition, info.firstOffset, info.lastOffset))
-        ProduceResult(topicAndPartition, info.firstOffset, info.lastOffset)
-      } catch {
-        // NOTE: Failed produce requests is not incremented for UnknownTopicOrPartitionException and NotLeaderForPartitionException
-        // since failed produce requests metric is supposed to indicate failure of a broker in handling a produce request
-        // for a partition it is the leader for
-        case e: KafkaStorageException =>
-          fatal("Halting due to unrecoverable I/O error while handling produce request: ", e)
-          Runtime.getRuntime.halt(1)
-          null
-        case utpe: UnknownTopicOrPartitionException =>
-          warn("Produce request with correlation id %d from client %s on partition %s failed due to %s".format(
-               producerRequest.correlationId, producerRequest.clientId, topicAndPartition, utpe.getMessage))
-          new ProduceResult(topicAndPartition, utpe)
-        case nle: NotLeaderForPartitionException =>
-          warn("Produce request with correlation id %d from client %s on partition %s failed due to %s".format(
-               producerRequest.correlationId, producerRequest.clientId, topicAndPartition, nle.getMessage))
-          new ProduceResult(topicAndPartition, nle)
-        case e: Throwable =>
-          BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).failedProduceRequestRate.mark()
-          BrokerTopicStats.getBrokerAllTopicsStats.failedProduceRequestRate.mark()
-          error("Error processing ProducerRequest with correlation id %d from client %s on partition %s"
-            .format(producerRequest.correlationId, producerRequest.clientId, topicAndPartition), e)
-          new ProduceResult(topicAndPartition, e)
-       }
-    }
+    // only allow appending to internal topic partitions
+    // if the client is not from admin
+    val internalTopicsAllowed = produceRequest.clientId == AdminUtils.AdminClientId
+
+    // call the replica manager to append messages to the replicas
+    replicaManager.appendMessages(
+      produceRequest.ackTimeoutMs.toLong,
+      produceRequest.requiredAcks,
+      internalTopicsAllowed,
+      produceRequest.data,
+      sendResponseCallback)
+
+    // if the request is put into the purgatory, it will have a held reference
+    // and hence cannot be garbage collected; hence we clear its data here in
+    // order to let GC re-claim its memory since it is already appended to log
+    produceRequest.emptyData()
   }
 
   /**
@@ -329,130 +211,44 @@ class KafkaApis(val requestChannel: RequestChannel,
    */
   def handleFetchRequest(request: RequestChannel.Request) {
     val fetchRequest = request.requestObj.asInstanceOf[FetchRequest]
-    if(fetchRequest.isFromFollower) {
-      maybeUpdatePartitionHw(fetchRequest)
-      // after updating HW, some delayed produce requests may be unblocked
-      var satisfiedProduceRequests = new mutable.ArrayBuffer[DelayedProduce]
-      fetchRequest.requestInfo.foreach {
-        case (topicAndPartition, _) =>
-          val key = new RequestKey(topicAndPartition)
-          satisfiedProduceRequests ++= producerRequestPurgatory.update(key, key)
-      }
-      debug("Replica %d fetch unblocked %d producer requests."
-        .format(fetchRequest.replicaId, satisfiedProduceRequests.size))
-      satisfiedProduceRequests.foreach(_.respond())
-    }
 
-    val dataRead = readMessageSets(fetchRequest)
-    val bytesReadable = dataRead.values.map(_.messages.sizeInBytes).sum
-    if(fetchRequest.maxWait <= 0 ||
-       bytesReadable >= fetchRequest.minBytes ||
-       fetchRequest.numPartitions <= 0) {
-      debug("Returning fetch response %s for fetch request with correlation id %d to client %s"
-        .format(dataRead.values.map(_.error).mkString(","), fetchRequest.correlationId, fetchRequest.clientId))
-      val response = new FetchResponse(fetchRequest.correlationId, dataRead)
-      requestChannel.sendResponse(new RequestChannel.Response(request, new FetchResponseSend(response)))
-    } else {
-      debug("Putting fetch request with correlation id %d from client %s into purgatory".format(fetchRequest.correlationId,
-        fetchRequest.clientId))
-      // create a list of (topic, partition) pairs to use as keys for this delayed request
-      val delayedFetchKeys = fetchRequest.requestInfo.keys.toSeq.map(new RequestKey(_))
-      val delayedFetch = new DelayedFetch(delayedFetchKeys, request, fetchRequest, fetchRequest.maxWait, bytesReadable)
-      fetchRequestPurgatory.watch(delayedFetch)
-    }
-  }
+    // the callback for sending the response
+    def sendResponseCallback(responsePartitionData: Map[TopicAndPartition, FetchResponsePartitionData]) {
+      responsePartitionData.foreach { case (topicAndPartition, data) =>
+        // we only print warnings for known errors here; if it is unknown, it will cause
+        // an error message in the replica manager already and hence can be ignored here
+        if (data.error != ErrorMapping.NoError && data.error != ErrorMapping.UnknownCode) {
+          debug("Fetch request with correlation id %d from client %s on partition %s failed due to %s"
+            .format(fetchRequest.correlationId, fetchRequest.clientId,
+            topicAndPartition, ErrorMapping.exceptionNameFor(data.error)))
+        }
 
-  private def maybeUpdatePartitionHw(fetchRequest: FetchRequest) {
-    debug("Maybe update partition HW due to fetch request: %s ".format(fetchRequest))
-    fetchRequest.requestInfo.foreach(info => {
-      val (topic, partition, offset) = (info._1.topic, info._1.partition, info._2.offset)
-      replicaManager.recordFollowerPosition(topic, partition, fetchRequest.replicaId, offset)
-    })
-  }
+        // record the bytes out metrics only when the response is being sent
+        BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).bytesOutRate.mark(data.messages.sizeInBytes)
+        BrokerTopicStats.getBrokerAllTopicsStats().bytesOutRate.mark(data.messages.sizeInBytes)
+      }
 
-  /**
-   * Read from all the offset details given and return a map of
-   * (topic, partition) -> PartitionData
-   */
-  private def readMessageSets(fetchRequest: FetchRequest) = {
-    val isFetchFromFollower = fetchRequest.isFromFollower
-    fetchRequest.requestInfo.map
-    {
-      case (TopicAndPartition(topic, partition), PartitionFetchInfo(offset, fetchSize)) =>
-        val partitionData =
-          try {
-            ensureTopicExists(topic)
-            val (messages, highWatermark) = readMessageSet(topic, partition, offset, fetchSize, fetchRequest.replicaId)
-            BrokerTopicStats.getBrokerTopicStats(topic).bytesOutRate.mark(messages.sizeInBytes)
-            BrokerTopicStats.getBrokerAllTopicsStats.bytesOutRate.mark(messages.sizeInBytes)
-            if (!isFetchFromFollower) {
-              new FetchResponsePartitionData(ErrorMapping.NoError, highWatermark, messages)
-            } else {
-              debug("Leader %d for partition [%s,%d] received fetch request from follower %d"
-                            .format(brokerId, topic, partition, fetchRequest.replicaId))
-              new FetchResponsePartitionData(ErrorMapping.NoError, highWatermark, messages)
-            }
-          } catch {
-            // NOTE: Failed fetch requests is not incremented for UnknownTopicOrPartitionException and NotLeaderForPartitionException
-            // since failed fetch requests metric is supposed to indicate failure of a broker in handling a fetch request
-            // for a partition it is the leader for
-            case utpe: UnknownTopicOrPartitionException =>
-              warn("Fetch request with correlation id %d from client %s on partition [%s,%d] failed due to %s".format(
-                   fetchRequest.correlationId, fetchRequest.clientId, topic, partition, utpe.getMessage))
-              new FetchResponsePartitionData(ErrorMapping.codeFor(utpe.getClass.asInstanceOf[Class[Throwable]]), -1L, MessageSet.Empty)
-            case nle: NotLeaderForPartitionException =>
-              warn("Fetch request with correlation id %d from client %s on partition [%s,%d] failed due to %s".format(
-                fetchRequest.correlationId, fetchRequest.clientId, topic, partition, nle.getMessage))
-              new FetchResponsePartitionData(ErrorMapping.codeFor(nle.getClass.asInstanceOf[Class[Throwable]]), -1L, MessageSet.Empty)
-            case t: Throwable =>
-              BrokerTopicStats.getBrokerTopicStats(topic).failedFetchRequestRate.mark()
-              BrokerTopicStats.getBrokerAllTopicsStats.failedFetchRequestRate.mark()
-              error("Error when processing fetch request for partition [%s,%d] offset %d from %s with correlation id %d"
-                    .format(topic, partition, offset, if (isFetchFromFollower) "follower" else "consumer", fetchRequest.correlationId), t)
-              new FetchResponsePartitionData(ErrorMapping.codeFor(t.getClass.asInstanceOf[Class[Throwable]]), -1L, MessageSet.Empty)
-          }
-        (TopicAndPartition(topic, partition), partitionData)
+      val response = FetchResponse(fetchRequest.correlationId, responsePartitionData)
+      requestChannel.sendResponse(new RequestChannel.Response(request, new FetchResponseSend(response)))
     }
-  }
 
-  /**
-   * Read from a single topic/partition at the given offset upto maxSize bytes
-   */
-  private def readMessageSet(topic: String, 
-                             partition: Int, 
-                             offset: Long,
-                             maxSize: Int, 
-                             fromReplicaId: Int): (MessageSet, Long) = {
-    // check if the current broker is the leader for the partitions
-    val localReplica = if(fromReplicaId == Request.DebuggingConsumerId)
-      replicaManager.getReplicaOrException(topic, partition)
-    else
-      replicaManager.getLeaderReplicaIfLocal(topic, partition)
-    trace("Fetching log segment for topic, partition, offset, size = " + (topic, partition, offset, maxSize))
-    val maxOffsetOpt = 
-      if (Request.isReplicaIdFromFollower(fromReplicaId))
-        None
-      else
-        Some(localReplica.highWatermark)
-    val messages = localReplica.log match {
-      case Some(log) =>
-        log.read(offset, maxSize, maxOffsetOpt)
-      case None =>
-        error("Leader for partition [%s,%d] on broker %d does not have a local log".format(topic, partition, brokerId))
-        MessageSet.Empty
-    }
-    (messages, localReplica.highWatermark)
+    // call the replica manager to fetch messages from the local replica
+    replicaManager.fetchMessages(
+      fetchRequest.maxWait.toLong,
+      fetchRequest.replicaId,
+      fetchRequest.minBytes,
+      fetchRequest.requestInfo,
+      sendResponseCallback)
   }
 
   /**
-   * Service the offset request API 
+   * Handle an offset request
    */
   def handleOffsetRequest(request: RequestChannel.Request) {
     val offsetRequest = request.requestObj.asInstanceOf[OffsetRequest]
     val responseMap = offsetRequest.requestInfo.map(elem => {
       val (topicAndPartition, partitionOffsetRequestInfo) = elem
       try {
-        ensureTopicExists(topicAndPartition.topic)
         // ensure leader exists
         val localReplica = if(!offsetRequest.isFromDebuggingClient)
           replicaManager.getLeaderReplicaIfLocal(topicAndPartition.topic, topicAndPartition.partition)
@@ -466,7 +262,7 @@ class KafkaApis(val requestChannel: RequestChannel,
           if (!offsetRequest.isFromOrdinaryClient) {
             allOffsets
           } else {
-            val hw = localReplica.highWatermark
+            val hw = localReplica.highWatermark.messageOffset
             if (allOffsets.exists(_ > hw))
               hw +: allOffsets.dropWhile(_ > hw)
             else 
@@ -478,15 +274,15 @@ class KafkaApis(val requestChannel: RequestChannel,
         // NOTE: UnknownTopicOrPartitionException and NotLeaderForPartitionException are special cased since these error messages
         // are typically transient and there is no value in logging the entire stack trace for the same
         case utpe: UnknownTopicOrPartitionException =>
-          warn("Offset request with correlation id %d from client %s on partition %s failed due to %s".format(
+          debug("Offset request with correlation id %d from client %s on partition %s failed due to %s".format(
                offsetRequest.correlationId, offsetRequest.clientId, topicAndPartition, utpe.getMessage))
           (topicAndPartition, PartitionOffsetsResponse(ErrorMapping.codeFor(utpe.getClass.asInstanceOf[Class[Throwable]]), Nil) )
         case nle: NotLeaderForPartitionException =>
-          warn("Offset request with correlation id %d from client %s on partition %s failed due to %s".format(
+          debug("Offset request with correlation id %d from client %s on partition %s failed due to %s".format(
                offsetRequest.correlationId, offsetRequest.clientId, topicAndPartition,nle.getMessage))
           (topicAndPartition, PartitionOffsetsResponse(ErrorMapping.codeFor(nle.getClass.asInstanceOf[Class[Throwable]]), Nil) )
         case e: Throwable =>
-          warn("Error while responding to offset request", e)
+          error("Error while responding to offset request", e)
           (topicAndPartition, PartitionOffsetsResponse(ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]), Nil) )
       }
     })
@@ -506,7 +302,7 @@ class KafkaApis(val requestChannel: RequestChannel,
     }
   }
   
-  def fetchOffsetsBefore(log: Log, timestamp: Long, maxNumOffsets: Int): Seq[Long] = {
+  private def fetchOffsetsBefore(log: Log, timestamp: Long, maxNumOffsets: Int): Seq[Long] = {
     val segsArray = log.logSegments.toArray
     var offsetTimeArray: Array[(Long, Long)] = null
     if(segsArray.last.size > 0)
@@ -547,373 +343,98 @@ class KafkaApis(val requestChannel: RequestChannel,
     ret.toSeq.sortBy(- _)
   }
 
-  /**
-   * Service the topic metadata request API
-   */
-  def handleTopicMetadataRequest(request: RequestChannel.Request) {
-    val metadataRequest = request.requestObj.asInstanceOf[TopicMetadataRequest]
-    val topicsMetadata = new mutable.ArrayBuffer[TopicMetadata]()
-    val config = replicaManager.config
-    var uniqueTopics = Set.empty[String]
-    uniqueTopics = {
-      if(metadataRequest.topics.size > 0)
-        metadataRequest.topics.toSet
-      else {
-        partitionMetadataLock synchronized {
-          metadataCache.keySet.map(_.topic)
-        }
-      }
-    }
-    val topicMetadataList =
-      partitionMetadataLock synchronized {
-        uniqueTopics.map { topic =>
-          if(metadataCache.keySet.map(_.topic).contains(topic)) {
-            val partitionStateInfo = metadataCache.filter(p => p._1.topic.equals(topic))
-            val sortedPartitions = partitionStateInfo.toList.sortWith((m1,m2) => m1._1.partition < m2._1.partition)
-            val partitionMetadata = sortedPartitions.map { case(topicAndPartition, partitionState) =>
-              val replicas = metadataCache(topicAndPartition).allReplicas
-              var replicaInfo: Seq[Broker] = replicas.map(aliveBrokers.getOrElse(_, null)).filter(_ != null).toSeq
-              var leaderInfo: Option[Broker] = None
-              var isrInfo: Seq[Broker] = Nil
-              val leaderIsrAndEpoch = partitionState.leaderIsrAndControllerEpoch
-              val leader = leaderIsrAndEpoch.leaderAndIsr.leader
-              val isr = leaderIsrAndEpoch.leaderAndIsr.isr
-              debug("%s".format(topicAndPartition) + ";replicas = " + replicas + ", in sync replicas = " + isr + ", leader = " + leader)
-              try {
-                if(aliveBrokers.keySet.contains(leader))
-                  leaderInfo = Some(aliveBrokers(leader))
-                else throw new LeaderNotAvailableException("Leader not available for partition %s".format(topicAndPartition))
-                isrInfo = isr.map(aliveBrokers.getOrElse(_, null)).filter(_ != null)
-                if(replicaInfo.size < replicas.size)
-                  throw new ReplicaNotAvailableException("Replica information not available for following brokers: " +
-                    replicas.filterNot(replicaInfo.map(_.id).contains(_)).mkString(","))
-                if(isrInfo.size < isr.size)
-                  throw new ReplicaNotAvailableException("In Sync Replica information not available for following brokers: " +
-                    isr.filterNot(isrInfo.map(_.id).contains(_)).mkString(","))
-                new PartitionMetadata(topicAndPartition.partition, leaderInfo, replicaInfo, isrInfo, ErrorMapping.NoError)
-              } catch {
-                case e: Throwable =>
-                  error("Error while fetching metadata for partition %s".format(topicAndPartition), e)
-                  new PartitionMetadata(topicAndPartition.partition, leaderInfo, replicaInfo, isrInfo,
-                    ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
-              }
+  private def getTopicMetadata(topics: Set[String]): Seq[TopicMetadata] = {
+    val topicResponses = metadataCache.getTopicMetadata(topics)
+    if (topics.size > 0 && topicResponses.size != topics.size) {
+      val nonExistentTopics = topics -- topicResponses.map(_.topic).toSet
+      val responsesForNonExistentTopics = nonExistentTopics.map { topic =>
+        if (topic == OffsetManager.OffsetsTopicName || config.autoCreateTopicsEnable) {
+          try {
+            if (topic == OffsetManager.OffsetsTopicName) {
+              AdminUtils.createTopic(zkClient, topic, config.offsetsTopicPartitions, config.offsetsTopicReplicationFactor,
+                                     offsetManager.offsetsTopicConfig)
+              info("Auto creation of topic %s with %d partitions and replication factor %d is successful!"
+                .format(topic, config.offsetsTopicPartitions, config.offsetsTopicReplicationFactor))
             }
-            new TopicMetadata(topic, partitionMetadata)
-          } else {
-            // topic doesn't exist, send appropriate error code
-            new TopicMetadata(topic, Seq.empty[PartitionMetadata], ErrorMapping.UnknownTopicOrPartitionCode)
-          }
-        }
-      }
-
-    // handle auto create topics
-    topicMetadataList.foreach { topicMetadata =>
-      topicMetadata.errorCode match {
-        case ErrorMapping.NoError => topicsMetadata += topicMetadata
-        case ErrorMapping.UnknownTopicOrPartitionCode =>
-          if (config.autoCreateTopicsEnable) {
-            try {
-              AdminUtils.createTopic(zkClient, topicMetadata.topic, config.numPartitions, config.defaultReplicationFactor)
+            else {
+              AdminUtils.createTopic(zkClient, topic, config.numPartitions, config.defaultReplicationFactor)
               info("Auto creation of topic %s with %d partitions and replication factor %d is successful!"
-                .format(topicMetadata.topic, config.numPartitions, config.defaultReplicationFactor))
-            } catch {
-              case e: TopicExistsException => // let it go, possibly another broker created this topic
+                   .format(topic, config.numPartitions, config.defaultReplicationFactor))
             }
-            topicsMetadata += new TopicMetadata(topicMetadata.topic, topicMetadata.partitionsMetadata, ErrorMapping.LeaderNotAvailableCode)
-          } else {
-            topicsMetadata += topicMetadata
+          } catch {
+            case e: TopicExistsException => // let it go, possibly another broker created this topic
           }
-        case _ =>
-          debug("Error while fetching topic metadata for topic %s due to %s ".format(topicMetadata.topic,
-            ErrorMapping.exceptionFor(topicMetadata.errorCode).getClass.getName))
-          topicsMetadata += topicMetadata
+          new TopicMetadata(topic, Seq.empty[PartitionMetadata], ErrorMapping.LeaderNotAvailableCode)
+        } else {
+          new TopicMetadata(topic, Seq.empty[PartitionMetadata], ErrorMapping.UnknownTopicOrPartitionCode)
+        }
       }
+      topicResponses.appendAll(responsesForNonExistentTopics)
     }
-    trace("Sending topic metadata %s for correlation id %d to client %s".format(topicsMetadata.mkString(","), metadataRequest.correlationId, metadataRequest.clientId))
-    val response = new TopicMetadataResponse(topicsMetadata.toSeq, metadataRequest.correlationId)
-    requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
+    topicResponses
   }
 
-  /* 
-   * Service the Offset commit API
+  /**
+   * Handle a topic metadata request
    */
-  def handleOffsetCommitRequest(request: RequestChannel.Request) {
-    val offsetCommitRequest = request.requestObj.asInstanceOf[OffsetCommitRequest]
-    val responseInfo = offsetCommitRequest.requestInfo.map{
-      case (topicAndPartition, metaAndError) => {
-        val topicDirs = new ZKGroupTopicDirs(offsetCommitRequest.groupId, topicAndPartition.topic)
-        try {
-          ensureTopicExists(topicAndPartition.topic)
-          if(metaAndError.metadata != null && metaAndError.metadata.length > config.offsetMetadataMaxSize) {
-            (topicAndPartition, ErrorMapping.OffsetMetadataTooLargeCode)
-          } else {
-            ZkUtils.updatePersistentPath(zkClient, topicDirs.consumerOffsetDir + "/" +
-              topicAndPartition.partition, metaAndError.offset.toString)
-            (topicAndPartition, ErrorMapping.NoError)
-          }
-        } catch {
-          case e: Throwable => (topicAndPartition, ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
-        }
-      }
-    }
-    val response = new OffsetCommitResponse(responseInfo, 
-                                            offsetCommitRequest.correlationId)
+  def handleTopicMetadataRequest(request: RequestChannel.Request) {
+    val metadataRequest = request.requestObj.asInstanceOf[TopicMetadataRequest]
+    val topicMetadata = getTopicMetadata(metadataRequest.topics.toSet)
+    val brokers = metadataCache.getAliveBrokers
+    trace("Sending topic metadata %s and brokers %s for correlation id %d to client %s".format(topicMetadata.mkString(","), brokers.mkString(","), metadataRequest.correlationId, metadataRequest.clientId))
+    val response = new TopicMetadataResponse(brokers, topicMetadata, metadataRequest.correlationId)
     requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
   }
 
   /*
-   * Service the Offset fetch API
+   * Handle an offset fetch request
    */
   def handleOffsetFetchRequest(request: RequestChannel.Request) {
     val offsetFetchRequest = request.requestObj.asInstanceOf[OffsetFetchRequest]
-    val responseInfo = offsetFetchRequest.requestInfo.map( t => {
-      val topicDirs = new ZKGroupTopicDirs(offsetFetchRequest.groupId, t.topic)
-      try {
-        ensureTopicExists(t.topic)
-        val payloadOpt = ZkUtils.readDataMaybeNull(zkClient, topicDirs.consumerOffsetDir + "/" + t.partition)._1
-        payloadOpt match {
-          case Some(payload) => {
-            (t, OffsetMetadataAndError(offset=payload.toLong, error=ErrorMapping.NoError))
-          } 
-          case None => (t, OffsetMetadataAndError(OffsetMetadataAndError.InvalidOffset, OffsetMetadataAndError.NoMetadata,
-                          ErrorMapping.UnknownTopicOrPartitionCode))
-        }
-      } catch {
-        case e: Throwable =>
-          (t, OffsetMetadataAndError(OffsetMetadataAndError.InvalidOffset, OffsetMetadataAndError.NoMetadata,
-             ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])))
-      }
-    })
-    val response = new OffsetFetchResponse(collection.immutable.Map(responseInfo: _*), 
-                                           offsetFetchRequest.correlationId)
-    requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
-  }
-
-  def close() {
-    debug("Shutting down.")
-    fetchRequestPurgatory.shutdown()
-    producerRequestPurgatory.shutdown()
-    debug("Shut down complete.")
-  }
-
-  private [kafka] trait MetricKey {
-    def keyLabel: String
-  }
-  private [kafka] object MetricKey {
-    val globalLabel = "All"
-  }
 
-  private [kafka] case class RequestKey(topic: String, partition: Int)
-          extends MetricKey {
+    val (unknownTopicPartitions, knownTopicPartitions) = offsetFetchRequest.requestInfo.partition(topicAndPartition =>
+      metadataCache.getPartitionInfo(topicAndPartition.topic, topicAndPartition.partition).isEmpty
+    )
+    val unknownStatus = unknownTopicPartitions.map(topicAndPartition => (topicAndPartition, OffsetMetadataAndError.UnknownTopicOrPartition)).toMap
+    val knownStatus = offsetManager.getOffsets(offsetFetchRequest.groupId, knownTopicPartitions).toMap
+    val status = unknownStatus ++ knownStatus
 
-    def this(topicAndPartition: TopicAndPartition) = this(topicAndPartition.topic, topicAndPartition.partition)
+    val response = OffsetFetchResponse(status, offsetFetchRequest.correlationId)
 
-    def topicAndPartition = TopicAndPartition(topic, partition)
-
-    override def keyLabel = "%s-%d".format(topic, partition)
-  }
-
-  /**
-   * A delayed fetch request
-   */
-  class DelayedFetch(keys: Seq[RequestKey], request: RequestChannel.Request, val fetch: FetchRequest, delayMs: Long, initialSize: Long)
-    extends DelayedRequest(keys, request, delayMs) {
-    val bytesAccumulated = new AtomicLong(initialSize)
+    trace("Sending offset fetch response %s for correlation id %d to client %s."
+          .format(response, offsetFetchRequest.correlationId, offsetFetchRequest.clientId))
+    requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
   }
 
-  /**
-   * A holding pen for fetch requests waiting to be satisfied
+  /*
+   * Handle a consumer metadata request
    */
-  class FetchRequestPurgatory(requestChannel: RequestChannel, purgeInterval: Int)
-          extends RequestPurgatory[DelayedFetch, Int](brokerId, purgeInterval) {
-    this.logIdent = "[FetchRequestPurgatory-%d] ".format(brokerId)
-
-    /**
-     * A fetch request is satisfied when it has accumulated enough data to meet the min_bytes field
-     */
-    def checkSatisfied(messageSizeInBytes: Int, delayedFetch: DelayedFetch): Boolean = {
-      val accumulatedSize = delayedFetch.bytesAccumulated.addAndGet(messageSizeInBytes)
-      accumulatedSize >= delayedFetch.fetch.minBytes
-    }
-
-    /**
-     * When a request expires just answer it with whatever data is present
-     */
-    def expire(delayed: DelayedFetch) {
-      debug("Expiring fetch request %s.".format(delayed.fetch))
-      try {
-        val topicData = readMessageSets(delayed.fetch)
-        val response = FetchResponse(delayed.fetch.correlationId, topicData)
-        val fromFollower = delayed.fetch.isFromFollower
-        delayedRequestMetrics.recordDelayedFetchExpired(fromFollower)
-        requestChannel.sendResponse(new RequestChannel.Response(delayed.request, new FetchResponseSend(response)))
-      }
-      catch {
-        case e1: LeaderNotAvailableException =>
-          debug("Leader changed before fetch request %s expired.".format(delayed.fetch))
-        case e2: UnknownTopicOrPartitionException =>
-          debug("Replica went offline before fetch request %s expired.".format(delayed.fetch))
-      }
-    }
-  }
+  def handleConsumerMetadataRequest(request: RequestChannel.Request) {
+    val consumerMetadataRequest = request.requestObj.asInstanceOf[ConsumerMetadataRequest]
 
-  class DelayedProduce(keys: Seq[RequestKey],
-                       request: RequestChannel.Request,
-                       initialErrorsAndOffsets: Map[TopicAndPartition, ProducerResponseStatus],
-                       val produce: ProducerRequest,
-                       delayMs: Long)
-          extends DelayedRequest(keys, request, delayMs) with Logging {
-
-    /**
-     * Map of (topic, partition) -> partition status
-     * The values in this map don't need to be synchronized since updates to the
-     * values are effectively synchronized by the ProducerRequestPurgatory's
-     * update method
-     */
-    private [kafka] val partitionStatus = keys.map(requestKey => {
-      val producerResponseStatus = initialErrorsAndOffsets(TopicAndPartition(requestKey.topic, requestKey.partition))
-      // if there was an error in writing to the local replica's log, then don't
-      // wait for acks on this partition
-      val (acksPending, error, nextOffset) =
-        if (producerResponseStatus.error == ErrorMapping.NoError) {
-          // Timeout error state will be cleared when requiredAcks are received
-          (true, ErrorMapping.RequestTimedOutCode, producerResponseStatus.offset)
-        }
-        else (false, producerResponseStatus.error, producerResponseStatus.offset)
-
-      val initialStatus = PartitionStatus(acksPending, error, nextOffset)
-      trace("Initial partition status for %s = %s".format(requestKey.keyLabel, initialStatus))
-      (requestKey, initialStatus)
-    }).toMap
-
-    def respond() {
-      val finalErrorsAndOffsets = initialErrorsAndOffsets.map(
-        status => {
-          val pstat = partitionStatus(new RequestKey(status._1))
-          (status._1, ProducerResponseStatus(pstat.error, pstat.requiredOffset))
-        })
-      
-      val response = ProducerResponse(produce.correlationId, finalErrorsAndOffsets)
-
-      requestChannel.sendResponse(new RequestChannel.Response(
-        request, new BoundedByteBufferSend(response)))
-    }
-
-    /**
-     * Returns true if this delayed produce request is satisfied (or more
-     * accurately, unblocked) -- this is the case if for every partition:
-     * Case A: This broker is not the leader: unblock - should return error.
-     * Case B: This broker is the leader:
-     *   B.1 - If there was a localError (when writing to the local log): unblock - should return error
-     *   B.2 - else, at least requiredAcks replicas should be caught up to this request.
-     *
-     * As partitions become acknowledged, we may be able to unblock
-     * DelayedFetchRequests that are pending on those partitions.
-     */
-    def isSatisfied(followerFetchRequestKey: RequestKey) = {
-      val topic = followerFetchRequestKey.topic
-      val partitionId = followerFetchRequestKey.partition
-      val key = RequestKey(topic, partitionId)
-      val fetchPartitionStatus = partitionStatus(key)
-      trace("Checking producer request satisfaction for %s-%d, acksPending = %b"
-        .format(topic, partitionId, fetchPartitionStatus.acksPending))
-      if (fetchPartitionStatus.acksPending) {
-        val partitionOpt = replicaManager.getPartition(topic, partitionId)
-        val (hasEnough, errorCode) = partitionOpt match {
-          case Some(partition) =>
-            partition.checkEnoughReplicasReachOffset(fetchPartitionStatus.requiredOffset, produce.requiredAcks)
-          case None =>
-            (false, ErrorMapping.UnknownTopicOrPartitionCode)
-        }
-        if (errorCode != ErrorMapping.NoError) {
-          fetchPartitionStatus.acksPending = false
-          fetchPartitionStatus.error = errorCode
-        } else if (hasEnough) {
-          fetchPartitionStatus.acksPending = false
-          fetchPartitionStatus.error = ErrorMapping.NoError
-        }
-        if (!fetchPartitionStatus.acksPending) {
-          val messageSizeInBytes = produce.topicPartitionMessageSizeMap(followerFetchRequestKey.topicAndPartition)
-          maybeUnblockDelayedFetchRequests(topic, partitionId, messageSizeInBytes)
-        }
-      }
+    val partition = offsetManager.partitionFor(consumerMetadataRequest.group)
 
-      // unblocked if there are no partitions with pending acks
-      val satisfied = ! partitionStatus.exists(p => p._2.acksPending)
-      trace("Producer request satisfaction for %s-%d = %b".format(topic, partitionId, satisfied))
-      satisfied
-    }
+    // get metadata (and create the topic if necessary)
+    val offsetsTopicMetadata = getTopicMetadata(Set(OffsetManager.OffsetsTopicName)).head
 
-    case class PartitionStatus(var acksPending: Boolean,
-                          var error: Short,
-                          requiredOffset: Long) {
-      def setThisBrokerNotLeader() {
-        error = ErrorMapping.NotLeaderForPartitionCode
-        acksPending = false
-      }
+    val errorResponse = ConsumerMetadataResponse(None, ErrorMapping.ConsumerCoordinatorNotAvailableCode, consumerMetadataRequest.correlationId)
 
-      override def toString =
-        "acksPending:%b, error: %d, requiredOffset: %d".format(
-          acksPending, error, requiredOffset
-        )
-    }
-  }
+    val response =
+      offsetsTopicMetadata.partitionsMetadata.find(_.partitionId == partition).map { partitionMetadata =>
+        partitionMetadata.leader.map { leader =>
+          ConsumerMetadataResponse(Some(leader), ErrorMapping.NoError, consumerMetadataRequest.correlationId)
+        }.getOrElse(errorResponse)
+      }.getOrElse(errorResponse)
 
-  /**
-   * A holding pen for produce requests waiting to be satisfied.
-   */
-  private [kafka] class ProducerRequestPurgatory(purgeInterval: Int)
-          extends RequestPurgatory[DelayedProduce, RequestKey](brokerId, purgeInterval) {
-    this.logIdent = "[ProducerRequestPurgatory-%d] ".format(brokerId)
-
-    protected def checkSatisfied(followerFetchRequestKey: RequestKey,
-                                 delayedProduce: DelayedProduce) =
-      delayedProduce.isSatisfied(followerFetchRequestKey)
-
-    /**
-     * Handle an expired delayed request
-     */
-    protected def expire(delayedProduce: DelayedProduce) {
-      for (partitionStatus <- delayedProduce.partitionStatus if partitionStatus._2.acksPending)
-        delayedRequestMetrics.recordDelayedProducerKeyExpired(partitionStatus._1)
-
-      delayedProduce.respond()
-    }
+    trace("Sending consumer metadata %s for correlation id %d to client %s."
+          .format(response, consumerMetadataRequest.correlationId, consumerMetadataRequest.clientId))
+    requestChannel.sendResponse(new RequestChannel.Response(request, new BoundedByteBufferSend(response)))
   }
 
-  private class DelayedRequestMetrics {
-    private class DelayedProducerRequestMetrics(keyLabel: String = MetricKey.globalLabel) extends KafkaMetricsGroup {
-      val expiredRequestMeter = newMeter(keyLabel + "ExpiresPerSecond", "requests", TimeUnit.SECONDS)
-    }
-
-
-    private class DelayedFetchRequestMetrics(forFollower: Boolean) extends KafkaMetricsGroup {
-      private val metricPrefix = if (forFollower) "Follower" else "Consumer"
-
-      val expiredRequestMeter = newMeter(metricPrefix + "ExpiresPerSecond", "requests", TimeUnit.SECONDS)
-    }
-
-    private val producerRequestMetricsForKey = {
-      val valueFactory = (k: MetricKey) => new DelayedProducerRequestMetrics(k.keyLabel + "-")
-      new Pool[MetricKey, DelayedProducerRequestMetrics](Some(valueFactory))
-    }
-
-    private val aggregateProduceRequestMetrics = new DelayedProducerRequestMetrics
-
-    private val aggregateFollowerFetchRequestMetrics = new DelayedFetchRequestMetrics(forFollower = true)
-    private val aggregateNonFollowerFetchRequestMetrics = new DelayedFetchRequestMetrics(forFollower = false)
-
-    def recordDelayedProducerKeyExpired(key: MetricKey) {
-      val keyMetrics = producerRequestMetricsForKey.getAndMaybePut(key)
-      List(keyMetrics, aggregateProduceRequestMetrics).foreach(_.expiredRequestMeter.mark())
-    }
-
-    def recordDelayedFetchExpired(forFollower: Boolean) {
-      val metrics = if (forFollower) aggregateFollowerFetchRequestMetrics
-        else aggregateNonFollowerFetchRequestMetrics
-      
-      metrics.expiredRequestMeter.mark()
-    }
+  def close() {
+    // TODO currently closing the API is an no-op since the API no longer maintain any modules
+    // maybe removing the closing call in the end when KafkaAPI becomes a pure stateless layer
+    debug("Shut down complete.")
   }
 }
 
diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala
index 3c3aafc2b3f06..6e26c5436feb4 100644
--- a/core/src/main/scala/kafka/server/KafkaConfig.scala
+++ b/core/src/main/scala/kafka/server/KafkaConfig.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -31,53 +31,79 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
     this(new VerifiableProperties(originalProps))
     props.verify()
   }
-  
+
   private def getLogRetentionTimeMillis(): Long = {
-    var millisInMinute = 60L * 1000L
+    val millisInMinute = 60L * 1000L
     val millisInHour = 60L * millisInMinute
-    if(props.containsKey("log.retention.minutes")){
+    
+    if(props.containsKey("log.retention.ms")){
+       props.getIntInRange("log.retention.ms", (1, Int.MaxValue))
+    }
+    else if(props.containsKey("log.retention.minutes")){
        millisInMinute * props.getIntInRange("log.retention.minutes", (1, Int.MaxValue))
-    } else {
+    } 
+    else {
        millisInHour * props.getIntInRange("log.retention.hours", 24*7, (1, Int.MaxValue))
     }
+  }
+
+  private def getLogRollTimeMillis(): Long = {
+    val millisInHour = 60L * 60L * 1000L
     
+    if(props.containsKey("log.roll.ms")){
+       props.getIntInRange("log.roll.ms", (1, Int.MaxValue))
+    }
+    else {
+       millisInHour * props.getIntInRange("log.roll.hours", 24*7, (1, Int.MaxValue))
+    }
+  }
+
+  private def getLogRollTimeJitterMillis(): Long = {
+    val millisInHour = 60L * 60L * 1000L
+
+    if(props.containsKey("log.roll.jitter.ms")) {
+      props.getIntInRange("log.roll.jitter.ms", (0, Int.MaxValue))
+    }
+    else {
+      millisInHour * props.getIntInRange("log.roll.jitter.hours", 0, (0, Int.MaxValue))
+    }
   }
 
   /*********** General Configuration ***********/
-  
+
   /* the broker id for this server */
   val brokerId: Int = props.getIntInRange("broker.id", (0, Int.MaxValue))
 
   /* the maximum size of message that the server can receive */
   val messageMaxBytes = props.getIntInRange("message.max.bytes", 1000000 + MessageSet.LogOverhead, (0, Int.MaxValue))
-  
+
   /* the number of network threads that the server uses for handling network requests */
   val numNetworkThreads = props.getIntInRange("num.network.threads", 3, (1, Int.MaxValue))
 
   /* the number of io threads that the server uses for carrying out network requests */
   val numIoThreads = props.getIntInRange("num.io.threads", 8, (1, Int.MaxValue))
-  
+
   /* the number of threads to use for various background processing tasks */
-  val backgroundThreads = props.getIntInRange("background.threads", 4, (1, Int.MaxValue))
-  
+  val backgroundThreads = props.getIntInRange("background.threads", 10, (1, Int.MaxValue))
+
   /* the number of queued requests allowed before blocking the network threads */
   val queuedMaxRequests = props.getIntInRange("queued.max.requests", 500, (1, Int.MaxValue))
-  
+
   /*********** Socket Server Configuration ***********/
-  
+
   /* the port to listen and accept connections on */
   val port: Int = props.getInt("port", 6667)
 
   /* hostname of broker. If this is set, it will only bind to this address. If this is not set,
    * it will bind to all interfaces */
   val hostName: String = props.getString("host.name", null)
-  
+
   /* hostname to publish to ZooKeeper for clients to use. In IaaS environments, this may
    * need to be different from the interface to which the broker binds. If this is not set,
    * it will use the value for "host.name" if configured. Otherwise
    * it will use the value returned from java.net.InetAddress.getCanonicalHostName(). */
   val advertisedHostName: String = props.getString("advertised.host.name", hostName)
-    
+
   /* the port to publish to ZooKeeper for clients to use. In IaaS environments, this may
    * need to be different from the port to which the broker binds. If this is not set,
    * it will publish the same port that the broker binds to. */
@@ -85,27 +111,39 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
 
   /* the SO_SNDBUFF buffer of the socket sever sockets */
   val socketSendBufferBytes: Int = props.getInt("socket.send.buffer.bytes", 100*1024)
-  
+
   /* the SO_RCVBUFF buffer of the socket sever sockets */
   val socketReceiveBufferBytes: Int = props.getInt("socket.receive.buffer.bytes", 100*1024)
-  
+
   /* the maximum number of bytes in a socket request */
   val socketRequestMaxBytes: Int = props.getIntInRange("socket.request.max.bytes", 100*1024*1024, (1, Int.MaxValue))
   
+  /* the maximum number of connections we allow from each ip address */
+  val maxConnectionsPerIp: Int = props.getIntInRange("max.connections.per.ip", Int.MaxValue, (1, Int.MaxValue))
+  
+  /* per-ip or hostname overrides to the default maximum number of connections */
+  val maxConnectionsPerIpOverrides = props.getMap("max.connections.per.ip.overrides").map(entry => (entry._1, entry._2.toInt))
+
+  /* idle connections timeout: the server socket processor threads close the connections that idle more than this */
+  val connectionsMaxIdleMs = props.getLong("connections.max.idle.ms", 10*60*1000L)
+
   /*********** Log Configuration ***********/
 
   /* the default number of log partitions per topic */
   val numPartitions = props.getIntInRange("num.partitions", 1, (1, Int.MaxValue))
-  
+
   /* the directories in which the log data is kept */
   val logDirs = Utils.parseCsvList(props.getString("log.dirs", props.getString("log.dir", "/tmp/kafka-logs")))
   require(logDirs.size > 0)
-  
+
   /* the maximum size of a single log file */
   val logSegmentBytes = props.getIntInRange("log.segment.bytes", 1*1024*1024*1024, (Message.MinHeaderSize, Int.MaxValue))
 
   /* the maximum time before a new log segment is rolled out */
-  val logRollHours = props.getIntInRange("log.roll.hours", 24*7, (1, Int.MaxValue))
+  val logRollTimeMillis = getLogRollTimeMillis
+
+  /* the maximum jitter to subtract from logRollTimeMillis */
+  val logRollTimeJitterMillis = getLogRollTimeJitterMillis
 
   /* the number of hours to keep a log file before deleting it */
   val logRetentionTimeMillis = getLogRetentionTimeMillis
@@ -115,42 +153,42 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
 
   /* the frequency in minutes that the log cleaner checks whether any log is eligible for deletion */
   val logCleanupIntervalMs = props.getLongInRange("log.retention.check.interval.ms", 5*60*1000, (1, Long.MaxValue))
-  
-  /* the default cleanup policy for segments beyond the retention window, must be either "delete" or "dedupe" */
+
+  /* the default cleanup policy for segments beyond the retention window, must be either "delete" or "compact" */
   val logCleanupPolicy = props.getString("log.cleanup.policy", "delete")
-  
+
   /* the number of background threads to use for log cleaning */
   val logCleanerThreads = props.getIntInRange("log.cleaner.threads", 1, (0, Int.MaxValue))
-  
+
   /* the log cleaner will be throttled so that the sum of its read and write i/o will be less than this value on average */
   val logCleanerIoMaxBytesPerSecond = props.getDouble("log.cleaner.io.max.bytes.per.second", Double.MaxValue)
-  
+
   /* the total memory used for log deduplication across all cleaner threads */
   val logCleanerDedupeBufferSize = props.getLongInRange("log.cleaner.dedupe.buffer.size", 500*1024*1024L, (0, Long.MaxValue))
   require(logCleanerDedupeBufferSize / logCleanerThreads > 1024*1024, "log.cleaner.dedupe.buffer.size must be at least 1MB per cleaner thread.")
-  
+
   /* the total memory used for log cleaner I/O buffers across all cleaner threads */
   val logCleanerIoBufferSize = props.getIntInRange("log.cleaner.io.buffer.size", 512*1024, (0, Int.MaxValue))
-  
+
   /* log cleaner dedupe buffer load factor. The percentage full the dedupe buffer can become. A higher value
    * will allow more log to be cleaned at once but will lead to more hash collisions */
   val logCleanerDedupeBufferLoadFactor = props.getDouble("log.cleaner.io.buffer.load.factor", 0.9d)
-  
+
   /* the amount of time to sleep when there are no logs to clean */
-  val logCleanerBackoffMs = props.getLongInRange("log.cleaner.backoff.ms", 30*1000, (0L, Long.MaxValue))
-  
+  val logCleanerBackoffMs = props.getLongInRange("log.cleaner.backoff.ms", 15*1000, (0L, Long.MaxValue))
+
   /* the minimum ratio of dirty log to total log for a log to eligible for cleaning */
   val logCleanerMinCleanRatio = props.getDouble("log.cleaner.min.cleanable.ratio", 0.5)
-  
+
   /* should we enable log cleaning? */
   val logCleanerEnable = props.getBoolean("log.cleaner.enable", false)
-  
+
   /* how long are delete records retained? */
   val logCleanerDeleteRetentionMs = props.getLong("log.cleaner.delete.retention.ms", 24 * 60 * 60 * 1000L)
-  
+
   /* the maximum size in bytes of the offset index */
   val logIndexSizeMaxBytes = props.getIntInRange("log.index.size.max.bytes", 10*1024*1024, (4, Int.MaxValue))
-  
+
   /* the interval with which we add an entry to the offset index */
   val logIndexIntervalBytes = props.getIntInRange("log.index.interval.bytes", 4096, (0, Int.MaxValue))
 
@@ -165,20 +203,28 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
 
   /* the maximum time in ms that a message in any topic is kept in memory before flushed to disk */
   val logFlushIntervalMs = props.getLong("log.flush.interval.ms", logFlushSchedulerIntervalMs)
-  
+
   /* the frequency with which we update the persistent record of the last flush which acts as the log recovery point */
   val logFlushOffsetCheckpointIntervalMs = props.getIntInRange("log.flush.offset.checkpoint.interval.ms", 60000, (0, Int.MaxValue))
 
+  /* the number of threads per data directory to be used for log recovery at startup and flushing at shutdown */
+  val numRecoveryThreadsPerDataDir = props.getIntInRange("num.recovery.threads.per.data.dir", 1, (1, Int.MaxValue))
+
   /* enable auto creation of topic on the server */
   val autoCreateTopicsEnable = props.getBoolean("auto.create.topics.enable", true)
 
+  /* define the minimum number of replicas in ISR needed to satisfy a produce request with required.acks=-1 (or all) */
+  val minInSyncReplicas = props.getIntInRange("min.insync.replicas",1,(1,Int.MaxValue))
+
+
+
   /*********** Replication configuration ***********/
 
   /* the socket timeout for controller-to-broker channels */
   val controllerSocketTimeoutMs = props.getInt("controller.socket.timeout.ms", 30000)
 
   /* the buffer size for controller-to-broker-channels */
-  val controllerMessageQueueSize= props.getInt("controller.message.queue.size", 10)
+  val controllerMessageQueueSize= props.getInt("controller.message.queue.size", Int.MaxValue)
 
   /* default replication factors for automatically created topics */
   val defaultReplicationFactor = props.getInt("default.replication.factor", 1)
@@ -189,8 +235,10 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
   /* If the lag in messages between a leader and a follower exceeds this number, the leader will remove the follower from isr */
   val replicaLagMaxMessages = props.getLong("replica.lag.max.messages", 4000)
 
-  /* the socket timeout for network requests */
+  /* the socket timeout for network requests. Its value should be at least replica.fetch.wait.max.ms. */
   val replicaSocketTimeoutMs = props.getInt("replica.socket.timeout.ms", ConsumerConfig.SocketTimeout)
+  require(replicaFetchWaitMaxMs <= replicaSocketTimeoutMs, "replica.socket.timeout.ms should always be at least replica.fetch.wait.max.ms" +
+    " to prevent unnecessary socket timeouts")
 
   /* the socket receive buffer for network requests */
   val replicaSocketReceiveBufferBytes = props.getInt("replica.socket.receive.buffer.bytes", ConsumerConfig.SocketBufferSize)
@@ -210,19 +258,19 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
   /* number of fetcher threads used to replicate messages from a source broker.
    * Increasing this value can increase the degree of I/O parallelism in the follower broker. */
   val numReplicaFetchers = props.getInt("num.replica.fetchers", 1)
-  
+
   /* the frequency with which the high watermark is saved out to disk */
   val replicaHighWatermarkCheckpointIntervalMs = props.getLong("replica.high.watermark.checkpoint.interval.ms", 5000L)
 
   /* the purge interval (in number of requests) of the fetch request purgatory */
-  val fetchPurgatoryPurgeIntervalRequests = props.getInt("fetch.purgatory.purge.interval.requests", 10000)
+  val fetchPurgatoryPurgeIntervalRequests = props.getInt("fetch.purgatory.purge.interval.requests", 1000)
 
   /* the purge interval (in number of requests) of the producer request purgatory */
-  val producerPurgatoryPurgeIntervalRequests = props.getInt("producer.purgatory.purge.interval.requests", 10000)
+  val producerPurgatoryPurgeIntervalRequests = props.getInt("producer.purgatory.purge.interval.requests", 1000)
 
   /* Enables auto leader balancing. A background thread checks and triggers leader
    * balance if required at regular intervals */
-  val autoLeaderRebalanceEnable = props.getBoolean("auto.leader.rebalance.enable", false)
+  val autoLeaderRebalanceEnable = props.getBoolean("auto.leader.rebalance.enable", true)
 
   /* the ratio of leader imbalance allowed per broker. The controller would trigger a leader balance if it goes above
    * this value per broker. The value is specified in percentage. */
@@ -231,6 +279,9 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
   /* the frequency with which the partition rebalance check is triggered by the controller */
   val leaderImbalanceCheckIntervalSeconds = props.getInt("leader.imbalance.check.interval.seconds", 300)
 
+  /* indicates whether to enable replicas not in the ISR set to be elected as leader as a last resort, even though
+   * doing so may result in data loss */
+  val uncleanLeaderElectionEnable = props.getBoolean("unclean.leader.election.enable", true)
 
   /*********** Controlled shutdown configuration ***********/
 
@@ -242,10 +293,50 @@ class KafkaConfig private (val props: VerifiableProperties) extends ZKConfig(pro
   val controlledShutdownRetryBackoffMs = props.getInt("controlled.shutdown.retry.backoff.ms", 5000)
 
   /* enable controlled shutdown of the server */
-  val controlledShutdownEnable = props.getBoolean("controlled.shutdown.enable", false)
+  val controlledShutdownEnable = props.getBoolean("controlled.shutdown.enable", default = true)
+
+  /*********** Offset management configuration ***********/
 
-  /*********** Misc configuration ***********/
-  
   /* the maximum size for a metadata entry associated with an offset commit */
-  val offsetMetadataMaxSize = props.getInt("offset.metadata.max.bytes", 1024)
+  val offsetMetadataMaxSize = props.getInt("offset.metadata.max.bytes", OffsetManagerConfig.DefaultMaxMetadataSize)
+
+  /** Batch size for reading from the offsets segments when loading offsets into the cache. */
+  val offsetsLoadBufferSize = props.getIntInRange("offsets.load.buffer.size",
+    OffsetManagerConfig.DefaultLoadBufferSize, (1, Integer.MAX_VALUE))
+
+  /** The replication factor for the offset commit topic (set higher to ensure availability). */
+  val offsetsTopicReplicationFactor: Short = props.getShortInRange("offsets.topic.replication.factor",
+    OffsetManagerConfig.DefaultOffsetsTopicReplicationFactor, (1, Short.MaxValue))
+
+  /** The number of partitions for the offset commit topic (should not change after deployment). */
+  val offsetsTopicPartitions: Int = props.getIntInRange("offsets.topic.num.partitions",
+    OffsetManagerConfig.DefaultOffsetsTopicNumPartitions, (1, Integer.MAX_VALUE))
+
+  /** The offsets topic segment bytes should be kept relatively small in order to facilitate faster log compaction and cache loads */
+  val offsetsTopicSegmentBytes: Int = props.getIntInRange("offsets.topic.segment.bytes",
+    OffsetManagerConfig.DefaultOffsetsTopicSegmentBytes, (1, Integer.MAX_VALUE))
+
+  /** Compression codec for the offsets topic - compression may be used to achieve "atomic" commits. */
+  val offsetsTopicCompressionCodec = props.getCompressionCodec("offsets.topic.compression.codec",
+    OffsetManagerConfig.DefaultOffsetsTopicCompressionCodec)
+
+  /** Offsets older than this retention period will be discarded. */
+  val offsetsRetentionMinutes: Int = props.getIntInRange("offsets.retention.minutes", 24*60, (1, Integer.MAX_VALUE))
+
+  /** Frequency at which to check for stale offsets. */
+  val offsetsRetentionCheckIntervalMs: Long = props.getLongInRange("offsets.retention.check.interval.ms",
+    OffsetManagerConfig.DefaultOffsetsRetentionCheckIntervalMs, (1, Long.MaxValue))
+
+  /* Offset commit will be delayed until all replicas for the offsets topic receive the commit or this timeout is
+   * reached. This is similar to the producer request timeout. */
+   val offsetCommitTimeoutMs = props.getIntInRange("offsets.commit.timeout.ms",
+    OffsetManagerConfig.DefaultOffsetCommitTimeoutMs, (1, Integer.MAX_VALUE))
+
+  /** The required acks before the commit can be accepted. In general, the default (-1) should not be overridden. */
+  val offsetCommitRequiredAcks = props.getShortInRange("offsets.commit.required.acks",
+    OffsetManagerConfig.DefaultOffsetCommitRequiredAcks, (-1, offsetsTopicReplicationFactor))
+
+  /* Enables delete topic. Delete topic through the admin tool will have no effect if this config is turned off */
+  val deleteTopicEnable = props.getBoolean("delete.topic.enable", false)
+
 }
diff --git a/core/src/main/scala/kafka/server/KafkaHealthcheck.scala b/core/src/main/scala/kafka/server/KafkaHealthcheck.scala
index 8c69d095bfa9f..4acdd70fe9c1e 100644
--- a/core/src/main/scala/kafka/server/KafkaHealthcheck.scala
+++ b/core/src/main/scala/kafka/server/KafkaHealthcheck.scala
@@ -38,12 +38,18 @@ class KafkaHealthcheck(private val brokerId: Int,
                        private val zkClient: ZkClient) extends Logging {
 
   val brokerIdPath = ZkUtils.BrokerIdsPath + "/" + brokerId
+  val sessionExpireListener = new SessionExpireListener
   
   def startup() {
-    zkClient.subscribeStateChanges(new SessionExpireListener)
+    zkClient.subscribeStateChanges(sessionExpireListener)
     register()
   }
 
+  def shutdown() {
+    zkClient.unsubscribeStateChanges(sessionExpireListener)
+    ZkUtils.deregisterBrokerInZk(zkClient, brokerId)
+  }
+
   /**
    * Register this broker as "alive" in zookeeper
    */
diff --git a/core/src/main/scala/kafka/server/KafkaRequestHandler.scala b/core/src/main/scala/kafka/server/KafkaRequestHandler.scala
index 871212bf9110c..e4053fbe8ef78 100644
--- a/core/src/main/scala/kafka/server/KafkaRequestHandler.scala
+++ b/core/src/main/scala/kafka/server/KafkaRequestHandler.scala
@@ -21,17 +21,34 @@ import kafka.network._
 import kafka.utils._
 import kafka.metrics.KafkaMetricsGroup
 import java.util.concurrent.TimeUnit
+import com.yammer.metrics.core.Meter
 
 /**
  * A thread that answers kafka requests.
  */
-class KafkaRequestHandler(id: Int, brokerId: Int, val requestChannel: RequestChannel, apis: KafkaApis) extends Runnable with Logging {
+class KafkaRequestHandler(id: Int,
+                          brokerId: Int,
+                          val aggregateIdleMeter: Meter,
+                          val totalHandlerThreads: Int,
+                          val requestChannel: RequestChannel,
+                          apis: KafkaApis) extends Runnable with Logging {
   this.logIdent = "[Kafka Request Handler " + id + " on Broker " + brokerId + "], "
 
-  def run() { 
+  def run() {
     while(true) {
       try {
-        val req = requestChannel.receiveRequest()
+        var req : RequestChannel.Request = null
+        while (req == null) {
+          // We use a single meter for aggregate idle percentage for the thread pool.
+          // Since meter is calculated as total_recorded_value / time_window and
+          // time_window is independent of the number of threads, each recorded idle
+          // time should be discounted by # threads.
+          val startSelectTime = SystemTime.nanoseconds
+          req = requestChannel.receiveRequest(300)
+          val idleTime = SystemTime.nanoseconds - startSelectTime
+          aggregateIdleMeter.mark(idleTime / totalHandlerThreads)
+        }
+
         if(req eq RequestChannel.AllDone) {
           debug("Kafka request handler %d on broker %d received shut down command".format(
             id, brokerId))
@@ -52,12 +69,16 @@ class KafkaRequestHandler(id: Int, brokerId: Int, val requestChannel: RequestCha
 class KafkaRequestHandlerPool(val brokerId: Int,
                               val requestChannel: RequestChannel,
                               val apis: KafkaApis,
-                              numThreads: Int) extends Logging {
+                              numThreads: Int) extends Logging with KafkaMetricsGroup {
+
+  /* a meter to track the average free capacity of the request handlers */
+  private val aggregateIdleMeter = newMeter("RequestHandlerAvgIdlePercent", "percent", TimeUnit.NANOSECONDS)
+
   this.logIdent = "[Kafka Request Handler on Broker " + brokerId + "], "
   val threads = new Array[Thread](numThreads)
   val runnables = new Array[KafkaRequestHandler](numThreads)
   for(i <- 0 until numThreads) {
-    runnables(i) = new KafkaRequestHandler(i, brokerId, requestChannel, apis)
+    runnables(i) = new KafkaRequestHandler(i, brokerId, aggregateIdleMeter, numThreads, requestChannel, apis)
     threads(i) = Utils.daemonThread("kafka-request-handler-" + i, runnables(i))
     threads(i).start()
   }
@@ -72,23 +93,28 @@ class KafkaRequestHandlerPool(val brokerId: Int,
   }
 }
 
-class BrokerTopicMetrics(name: String) extends KafkaMetricsGroup {
-  val messagesInRate = newMeter(name + "MessagesInPerSec",  "messages", TimeUnit.SECONDS)
-  val bytesInRate = newMeter(name + "BytesInPerSec",  "bytes", TimeUnit.SECONDS)
-  val bytesOutRate = newMeter(name + "BytesOutPerSec",  "bytes", TimeUnit.SECONDS)
-  val logBytesAppendRate = newMeter(name + "LogBytesAppendedPerSec",  "bytes", TimeUnit.SECONDS)
-  val failedProduceRequestRate = newMeter(name + "FailedProduceRequestsPerSec",  "requests", TimeUnit.SECONDS)
-  val failedFetchRequestRate = newMeter(name + "FailedFetchRequestsPerSec",  "requests", TimeUnit.SECONDS)
+class BrokerTopicMetrics(name: Option[String]) extends KafkaMetricsGroup {
+  val tags: scala.collection.Map[String, String] = name match {
+    case None => scala.collection.Map.empty
+    case Some(topic) => Map("topic" -> topic)
+  }
+
+  val messagesInRate = newMeter("MessagesInPerSec", "messages", TimeUnit.SECONDS, tags)
+  val bytesInRate = newMeter("BytesInPerSec", "bytes", TimeUnit.SECONDS, tags)
+  val bytesOutRate = newMeter("BytesOutPerSec", "bytes", TimeUnit.SECONDS, tags)
+  val bytesRejectedRate = newMeter("BytesRejectedPerSec", "bytes", TimeUnit.SECONDS, tags)
+  val failedProduceRequestRate = newMeter("FailedProduceRequestsPerSec", "requests", TimeUnit.SECONDS, tags)
+  val failedFetchRequestRate = newMeter("FailedFetchRequestsPerSec", "requests", TimeUnit.SECONDS, tags)
 }
 
 object BrokerTopicStats extends Logging {
-  private val valueFactory = (k: String) => new BrokerTopicMetrics(k)
+  private val valueFactory = (k: String) => new BrokerTopicMetrics(Some(k))
   private val stats = new Pool[String, BrokerTopicMetrics](Some(valueFactory))
-  private val allTopicsStats = new BrokerTopicMetrics("AllTopics")
+  private val allTopicsStats = new BrokerTopicMetrics(None)
 
   def getBrokerAllTopicsStats(): BrokerTopicMetrics = allTopicsStats
 
   def getBrokerTopicStats(topic: String): BrokerTopicMetrics = {
-    stats.getAndMaybePut(topic + "-")
+    stats.getAndMaybePut(topic)
   }
 }
diff --git a/core/src/main/scala/kafka/server/KafkaServer.scala b/core/src/main/scala/kafka/server/KafkaServer.scala
index 5e34f95e64eaf..1bf7d10cef23a 100644
--- a/core/src/main/scala/kafka/server/KafkaServer.scala
+++ b/core/src/main/scala/kafka/server/KafkaServer.scala
@@ -31,20 +31,24 @@ import kafka.cluster.Broker
 import kafka.api.{ControlledShutdownResponse, ControlledShutdownRequest}
 import kafka.common.ErrorMapping
 import kafka.network.{Receive, BlockingChannel, SocketServer}
+import kafka.metrics.KafkaMetricsGroup
+import com.yammer.metrics.core.Gauge
 
 /**
  * Represents the lifecycle of a single Kafka broker. Handles all functionality required
  * to start up and shutdown a single Kafka node.
  */
-class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logging {
+class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logging with KafkaMetricsGroup {
   this.logIdent = "[Kafka Server " + config.brokerId + "], "
   private var isShuttingDown = new AtomicBoolean(false)
   private var shutdownLatch = new CountDownLatch(1)
-  private var startupComplete = new AtomicBoolean(false);
+  private var startupComplete = new AtomicBoolean(false)
+  val brokerState: BrokerState = new BrokerState
   val correlationId: AtomicInteger = new AtomicInteger(0)
   var socketServer: SocketServer = null
   var requestHandlerPool: KafkaRequestHandlerPool = null
   var logManager: LogManager = null
+  var offsetManager: OffsetManager = null
   var kafkaHealthcheck: KafkaHealthcheck = null
   var topicConfigManager: TopicConfigManager = null
   var replicaManager: ReplicaManager = null
@@ -53,68 +57,108 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
   val kafkaScheduler = new KafkaScheduler(config.backgroundThreads)
   var zkClient: ZkClient = null
 
+  newGauge(
+    "BrokerState",
+    new Gauge[Int] {
+      def value = brokerState.currentState
+    }
+  )
+
   /**
    * Start up API for bringing up a single instance of the Kafka server.
    * Instantiates the LogManager, the SocketServer and the request handlers - KafkaRequestHandlers
    */
   def startup() {
-    info("starting")
-    isShuttingDown = new AtomicBoolean(false)
-    shutdownLatch = new CountDownLatch(1)
+    try {
+      info("starting")
+      brokerState.newState(Starting)
+      isShuttingDown = new AtomicBoolean(false)
+      shutdownLatch = new CountDownLatch(1)
 
-    /* start scheduler */
-    kafkaScheduler.startup()
+      /* start scheduler */
+      kafkaScheduler.startup()
     
-    /* setup zookeeper */
-    zkClient = initZk()
-
-    /* start log manager */
-    logManager = createLogManager(zkClient)
-    logManager.startup()
-
-    socketServer = new SocketServer(config.brokerId,
-                                    config.hostName,
-                                    config.port,
-                                    config.numNetworkThreads,
-                                    config.queuedMaxRequests,
-                                    config.socketSendBufferBytes,
-                                    config.socketReceiveBufferBytes,
-                                    config.socketRequestMaxBytes)
-    socketServer.startup()
-
-    replicaManager = new ReplicaManager(config, time, zkClient, kafkaScheduler, logManager, isShuttingDown)
-    kafkaController = new KafkaController(config, zkClient)
+      /* setup zookeeper */
+      zkClient = initZk()
+
+      /* start log manager */
+      logManager = createLogManager(zkClient, brokerState)
+      logManager.startup()
+
+      socketServer = new SocketServer(config.brokerId,
+                                      config.hostName,
+                                      config.port,
+                                      config.numNetworkThreads,
+                                      config.queuedMaxRequests,
+                                      config.socketSendBufferBytes,
+                                      config.socketReceiveBufferBytes,
+                                      config.socketRequestMaxBytes,
+                                      config.maxConnectionsPerIp,
+                                      config.connectionsMaxIdleMs)
+      socketServer.startup()
+
+      replicaManager = new ReplicaManager(config, time, zkClient, kafkaScheduler, logManager, isShuttingDown)
+
+      /* start offset manager */
+      offsetManager = createOffsetManager()
+
+      kafkaController = new KafkaController(config, zkClient, brokerState)
     
-    /* start processing requests */
-    apis = new KafkaApis(socketServer.requestChannel, replicaManager, zkClient, config.brokerId, config, kafkaController)
-    requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads)
+      /* start processing requests */
+      apis = new KafkaApis(socketServer.requestChannel, replicaManager, offsetManager, zkClient, config.brokerId, config, kafkaController)
+      requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads)
+      brokerState.newState(RunningAsBroker)
    
-    Mx4jLoader.maybeLoad()
+      Mx4jLoader.maybeLoad()
 
-    replicaManager.startup()
+      replicaManager.startup()
 
-    kafkaController.startup()
+      kafkaController.startup()
     
-    topicConfigManager = new TopicConfigManager(zkClient, logManager)
-    topicConfigManager.startup()
+      topicConfigManager = new TopicConfigManager(zkClient, logManager)
+      topicConfigManager.startup()
     
-    /* tell everyone we are alive */
-    kafkaHealthcheck = new KafkaHealthcheck(config.brokerId, config.advertisedHostName, config.advertisedPort, config.zkSessionTimeoutMs, zkClient)
-    kafkaHealthcheck.startup()
+      /* tell everyone we are alive */
+      kafkaHealthcheck = new KafkaHealthcheck(config.brokerId, config.advertisedHostName, config.advertisedPort, config.zkSessionTimeoutMs, zkClient)
+      kafkaHealthcheck.startup()
 
     
-    registerStats()
-    startupComplete.set(true);
-    info("started")
+      registerStats()
+      startupComplete.set(true)
+      info("started")
+    }
+    catch {
+      case e: Throwable =>
+        fatal("Fatal error during KafkaServer startup. Prepare to shutdown", e)
+        shutdown()
+        throw e
+    }
   }
-  
+
   private def initZk(): ZkClient = {
     info("Connecting to zookeeper on " + config.zkConnect)
+
+    val chroot = {
+      if (config.zkConnect.indexOf("/") > 0)
+        config.zkConnect.substring(config.zkConnect.indexOf("/"))
+      else
+        ""
+    }
+
+    if (chroot.length > 1) {
+      val zkConnForChrootCreation = config.zkConnect.substring(0, config.zkConnect.indexOf("/"))
+      val zkClientForChrootCreation = new ZkClient(zkConnForChrootCreation, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, ZKStringSerializer)
+      ZkUtils.makeSurePersistentPathExists(zkClientForChrootCreation, chroot)
+      info("Created zookeeper path " + chroot)
+      zkClientForChrootCreation.close()
+    }
+
     val zkClient = new ZkClient(config.zkConnect, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, ZKStringSerializer)
     ZkUtils.setupCommonPaths(zkClient)
     zkClient
   }
 
+
   /**
    *  Forces some dynamic jmx beans to be registered on server startup.
    */
@@ -134,10 +178,11 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
       // the shutdown.
       var remainingRetries = config.controlledShutdownMaxRetries
       info("Starting controlled shutdown")
-      var channel : BlockingChannel = null;
+      var channel : BlockingChannel = null
       var prevController : Broker = null
-      var shutdownSuceeded : Boolean =false
+      var shutdownSuceeded : Boolean = false
       try {
+        brokerState.newState(PendingControlledShutdown)
         while (!shutdownSuceeded && remainingRetries > 0) {
           remainingRetries = remainingRetries - 1
 
@@ -172,6 +217,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
               // send the controlled shutdown request
               val request = new ControlledShutdownRequest(correlationId.getAndIncrement, config.brokerId)
               channel.send(request)
+
               response = channel.receive()
               val shutdownResponse = ControlledShutdownResponse.readFrom(response.buffer)
               if (shutdownResponse.errorCode == ErrorMapping.NoError && shutdownResponse.partitionsRemaining != null &&
@@ -188,6 +234,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
               case ioe: java.io.IOException =>
                 channel.disconnect()
                 channel = null
+                warn("Error during controlled shutdown, possibly because leader movement took longer than the configured socket.timeout.ms: %s".format(ioe.getMessage))
                 // ignore and try again
             }
           }
@@ -214,29 +261,42 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
    * Shuts down the LogManager, the SocketServer and the log cleaner scheduler thread
    */
   def shutdown() {
-    info("shutting down")
-    val canShutdown = isShuttingDown.compareAndSet(false, true);
-    if (canShutdown) {
-      Utils.swallow(controlledShutdown())
-      if(socketServer != null)
-        Utils.swallow(socketServer.shutdown())
-      if(requestHandlerPool != null)
-        Utils.swallow(requestHandlerPool.shutdown())
-      Utils.swallow(kafkaScheduler.shutdown())
-      if(apis != null)
-        Utils.swallow(apis.close())
-      if(replicaManager != null)
-        Utils.swallow(replicaManager.shutdown())
-      if(logManager != null)
-        Utils.swallow(logManager.shutdown())
-      if(kafkaController != null)
-        Utils.swallow(kafkaController.shutdown())
-      if(zkClient != null)
-        Utils.swallow(zkClient.close())
-
-      shutdownLatch.countDown()
-      startupComplete.set(false);
-      info("shut down completed")
+    try {
+      info("shutting down")
+      val canShutdown = isShuttingDown.compareAndSet(false, true)
+      if (canShutdown) {
+        Utils.swallow(controlledShutdown())
+        brokerState.newState(BrokerShuttingDown)
+        if(kafkaHealthcheck != null)
+          Utils.swallow(kafkaHealthcheck.shutdown())
+        if(socketServer != null)
+          Utils.swallow(socketServer.shutdown())
+        if(requestHandlerPool != null)
+          Utils.swallow(requestHandlerPool.shutdown())
+        if(offsetManager != null)
+          offsetManager.shutdown()
+        Utils.swallow(kafkaScheduler.shutdown())
+        if(apis != null)
+          Utils.swallow(apis.close())
+        if(replicaManager != null)
+          Utils.swallow(replicaManager.shutdown())
+        if(logManager != null)
+          Utils.swallow(logManager.shutdown())
+        if(kafkaController != null)
+          Utils.swallow(kafkaController.shutdown())
+        if(zkClient != null)
+          Utils.swallow(zkClient.close())
+
+        brokerState.newState(NotRunning)
+        shutdownLatch.countDown()
+        startupComplete.set(false)
+        info("shut down completed")
+      }
+    }
+    catch {
+      case e: Throwable =>
+        fatal("Fatal error during KafkaServer shutdown.", e)
+        throw e
     }
   }
 
@@ -247,9 +307,10 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
 
   def getLogManager(): LogManager = logManager
   
-  private def createLogManager(zkClient: ZkClient): LogManager = {
-    val defaultLogConfig = LogConfig(segmentSize = config.logSegmentBytes, 
-                                     segmentMs = 60L * 60L * 1000L * config.logRollHours,
+  private def createLogManager(zkClient: ZkClient, brokerState: BrokerState): LogManager = {
+    val defaultLogConfig = LogConfig(segmentSize = config.logSegmentBytes,
+                                     segmentMs = config.logRollTimeMillis,
+                                     segmentJitterMs = config.logRollTimeJitterMillis,
                                      flushInterval = config.logFlushIntervalMessages,
                                      flushMs = config.logFlushIntervalMs.toLong,
                                      retentionSize = config.logRetentionBytes,
@@ -260,7 +321,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
                                      deleteRetentionMs = config.logCleanerDeleteRetentionMs,
                                      fileDeleteDelayMs = config.logDeleteDelayMs,
                                      minCleanableRatio = config.logCleanerMinCleanRatio,
-                                     dedupe = config.logCleanupPolicy.trim.toLowerCase == "dedupe")
+                                     compact = config.logCleanupPolicy.trim.toLowerCase == "compact")
     val defaultProps = defaultLogConfig.toProps
     val configs = AdminUtils.fetchAllTopicConfigs(zkClient).mapValues(LogConfig.fromProps(defaultProps, _))
     // read the log configurations from zookeeper
@@ -276,13 +337,26 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime) extends Logg
                    topicConfigs = configs,
                    defaultConfig = defaultLogConfig,
                    cleanerConfig = cleanerConfig,
+                   ioThreads = config.numRecoveryThreadsPerDataDir,
                    flushCheckMs = config.logFlushSchedulerIntervalMs,
                    flushCheckpointMs = config.logFlushOffsetCheckpointIntervalMs,
                    retentionCheckMs = config.logCleanupIntervalMs,
                    scheduler = kafkaScheduler,
+                   brokerState = brokerState,
                    time = time)
   }
 
-}
+  private def createOffsetManager(): OffsetManager = {
+    val offsetManagerConfig = OffsetManagerConfig(
+      maxMetadataSize = config.offsetMetadataMaxSize,
+      loadBufferSize = config.offsetsLoadBufferSize,
+      offsetsRetentionMs = config.offsetsRetentionMinutes * 60 * 1000L,
+      offsetsTopicNumPartitions = config.offsetsTopicPartitions,
+      offsetsTopicReplicationFactor = config.offsetsTopicReplicationFactor,
+      offsetCommitTimeoutMs = config.offsetCommitTimeoutMs,
+      offsetCommitRequiredAcks = config.offsetCommitRequiredAcks)
+    new OffsetManager(offsetManagerConfig, replicaManager, zkClient, kafkaScheduler)
+  }
 
+}
 
diff --git a/core/src/main/scala/kafka/server/KafkaServerStartable.scala b/core/src/main/scala/kafka/server/KafkaServerStartable.scala
index acda52b801714..1c1b75b4137a8 100644
--- a/core/src/main/scala/kafka/server/KafkaServerStartable.scala
+++ b/core/src/main/scala/kafka/server/KafkaServerStartable.scala
@@ -17,26 +17,22 @@
 
 package kafka.server
 
+import kafka.common.AppInfo
 import kafka.utils.Logging
 
 
 class KafkaServerStartable(val serverConfig: KafkaConfig) extends Logging {
-  private var server : KafkaServer = null
-
-  init
-
-  private def init() {
-    server = new KafkaServer(serverConfig)
-  }
+  private val server = new KafkaServer(serverConfig)
 
   def startup() {
     try {
       server.startup()
+      AppInfo.registerInfo()
     }
     catch {
       case e: Throwable =>
-        fatal("Fatal error during KafkaServerStable startup. Prepare to shutdown", e)
-        shutdown()
+        fatal("Fatal error during KafkaServerStartable startup. Prepare to shutdown", e)
+        // KafkaServer already calls shutdown() internally, so this is purely for logging & the exit code
         System.exit(1)
     }
   }
@@ -52,6 +48,14 @@ class KafkaServerStartable(val serverConfig: KafkaConfig) extends Logging {
     }
   }
 
+  /**
+   * Allow setting broker state from the startable.
+   * This is needed when a custom kafka server startable want to emit new states that it introduces.
+   */
+  def setServerState(newState: Byte) {
+    server.brokerState.newState(newState)
+  }
+
   def awaitShutdown() = 
     server.awaitShutdown
 
diff --git a/core/src/main/scala/kafka/server/LogOffsetMetadata.scala b/core/src/main/scala/kafka/server/LogOffsetMetadata.scala
new file mode 100644
index 0000000000000..a868334e0e53d
--- /dev/null
+++ b/core/src/main/scala/kafka/server/LogOffsetMetadata.scala
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import org.apache.kafka.common.KafkaException
+
+object LogOffsetMetadata {
+  val UnknownOffsetMetadata = new LogOffsetMetadata(-1, 0, 0)
+  val UnknownSegBaseOffset = -1L
+  val UnknownFilePosition = -1
+
+  class OffsetOrdering extends Ordering[LogOffsetMetadata] {
+    override def compare(x: LogOffsetMetadata , y: LogOffsetMetadata ): Int = {
+      return x.offsetDiff(y).toInt
+    }
+  }
+
+}
+
+/*
+ * A log offset structure, including:
+ *  1. the message offset
+ *  2. the base message offset of the located segment
+ *  3. the physical position on the located segment
+ */
+case class LogOffsetMetadata(messageOffset: Long,
+                             segmentBaseOffset: Long = LogOffsetMetadata.UnknownSegBaseOffset,
+                             relativePositionInSegment: Int = LogOffsetMetadata.UnknownFilePosition) {
+
+  // check if this offset is already on an older segment compared with the given offset
+  def offsetOnOlderSegment(that: LogOffsetMetadata): Boolean = {
+    if (messageOffsetOnly())
+      throw new KafkaException("%s cannot compare its segment info with %s since it only has message offset info".format(this, that))
+
+    this.segmentBaseOffset < that.segmentBaseOffset
+  }
+
+  // check if this offset is on the same segment with the given offset
+  def offsetOnSameSegment(that: LogOffsetMetadata): Boolean = {
+    if (messageOffsetOnly())
+      throw new KafkaException("%s cannot compare its segment info with %s since it only has message offset info".format(this, that))
+
+    this.segmentBaseOffset == that.segmentBaseOffset
+  }
+
+  // check if this offset is before the given offset
+  def precedes(that: LogOffsetMetadata): Boolean = this.messageOffset < that.messageOffset
+
+  // compute the number of messages between this offset to the given offset
+  def offsetDiff(that: LogOffsetMetadata): Long = {
+    this.messageOffset - that.messageOffset
+  }
+
+  // compute the number of bytes between this offset to the given offset
+  // if they are on the same segment and this offset precedes the given offset
+  def positionDiff(that: LogOffsetMetadata): Int = {
+    if(!offsetOnSameSegment(that))
+      throw new KafkaException("%s cannot compare its segment position with %s since they are not on the same segment".format(this, that))
+    if(messageOffsetOnly())
+      throw new KafkaException("%s cannot compare its segment position with %s since it only has message offset info".format(this, that))
+
+    this.relativePositionInSegment - that.relativePositionInSegment
+  }
+
+  // decide if the offset metadata only contains message offset info
+  def messageOffsetOnly(): Boolean = {
+    segmentBaseOffset == LogOffsetMetadata.UnknownSegBaseOffset && relativePositionInSegment == LogOffsetMetadata.UnknownFilePosition
+  }
+
+  override def toString = messageOffset.toString + " [" + segmentBaseOffset + " : " + relativePositionInSegment + "]"
+
+}
diff --git a/core/src/main/scala/kafka/server/MetadataCache.scala b/core/src/main/scala/kafka/server/MetadataCache.scala
new file mode 100644
index 0000000000000..bf81a1ab88c14
--- /dev/null
+++ b/core/src/main/scala/kafka/server/MetadataCache.scala
@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import scala.collection.{Seq, Set, mutable}
+import kafka.api._
+import kafka.cluster.Broker
+import java.util.concurrent.locks.ReentrantReadWriteLock
+import kafka.utils.Utils._
+import kafka.common.{ErrorMapping, ReplicaNotAvailableException, LeaderNotAvailableException}
+import kafka.common.TopicAndPartition
+import kafka.controller.KafkaController.StateChangeLogger
+
+/**
+ *  A cache for the state (e.g., current leader) of each partition. This cache is updated through
+ *  UpdateMetadataRequest from the controller. Every broker maintains the same cache, asynchronously.
+ */
+private[server] class MetadataCache {
+  private val cache: mutable.Map[String, mutable.Map[Int, PartitionStateInfo]] =
+    new mutable.HashMap[String, mutable.Map[Int, PartitionStateInfo]]()
+  private var aliveBrokers: Map[Int, Broker] = Map()
+  private val partitionMetadataLock = new ReentrantReadWriteLock()
+
+  def getTopicMetadata(topics: Set[String]) = {
+    val isAllTopics = topics.isEmpty
+    val topicsRequested = if(isAllTopics) cache.keySet else topics
+    val topicResponses: mutable.ListBuffer[TopicMetadata] = new mutable.ListBuffer[TopicMetadata]
+    inReadLock(partitionMetadataLock) {
+      for (topic <- topicsRequested) {
+        if (isAllTopics || cache.contains(topic)) {
+          val partitionStateInfos = cache(topic)
+          val partitionMetadata = partitionStateInfos.map {
+            case (partitionId, partitionState) =>
+              val replicas = partitionState.allReplicas
+              val replicaInfo: Seq[Broker] = replicas.map(aliveBrokers.getOrElse(_, null)).filter(_ != null).toSeq
+              var leaderInfo: Option[Broker] = None
+              var isrInfo: Seq[Broker] = Nil
+              val leaderIsrAndEpoch = partitionState.leaderIsrAndControllerEpoch
+              val leader = leaderIsrAndEpoch.leaderAndIsr.leader
+              val isr = leaderIsrAndEpoch.leaderAndIsr.isr
+              val topicPartition = TopicAndPartition(topic, partitionId)
+              try {
+                leaderInfo = aliveBrokers.get(leader)
+                if (!leaderInfo.isDefined)
+                  throw new LeaderNotAvailableException("Leader not available for %s".format(topicPartition))
+                isrInfo = isr.map(aliveBrokers.getOrElse(_, null)).filter(_ != null)
+                if (replicaInfo.size < replicas.size)
+                  throw new ReplicaNotAvailableException("Replica information not available for following brokers: " +
+                    replicas.filterNot(replicaInfo.map(_.id).contains(_)).mkString(","))
+                if (isrInfo.size < isr.size)
+                  throw new ReplicaNotAvailableException("In Sync Replica information not available for following brokers: " +
+                    isr.filterNot(isrInfo.map(_.id).contains(_)).mkString(","))
+                new PartitionMetadata(partitionId, leaderInfo, replicaInfo, isrInfo, ErrorMapping.NoError)
+              } catch {
+                case e: Throwable =>
+                  debug("Error while fetching metadata for %s. Possible cause: %s".format(topicPartition, e.getMessage))
+                  new PartitionMetadata(partitionId, leaderInfo, replicaInfo, isrInfo,
+                    ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]]))
+              }
+          }
+          topicResponses += new TopicMetadata(topic, partitionMetadata.toSeq)
+        }
+      }
+    }
+    topicResponses
+  }
+
+  def getAliveBrokers = {
+    inReadLock(partitionMetadataLock) {
+      aliveBrokers.values.toSeq
+    }
+  }
+
+  def addOrUpdatePartitionInfo(topic: String,
+                               partitionId: Int,
+                               stateInfo: PartitionStateInfo) {
+    inWriteLock(partitionMetadataLock) {
+      cache.get(topic) match {
+        case Some(infos) => infos.put(partitionId, stateInfo)
+        case None => {
+          val newInfos: mutable.Map[Int, PartitionStateInfo] = new mutable.HashMap[Int, PartitionStateInfo]
+          cache.put(topic, newInfos)
+          newInfos.put(partitionId, stateInfo)
+        }
+      }
+    }
+  }
+
+  def getPartitionInfo(topic: String, partitionId: Int): Option[PartitionStateInfo] = {
+    inReadLock(partitionMetadataLock) {
+      cache.get(topic) match {
+        case Some(partitionInfos) => partitionInfos.get(partitionId)
+        case None => None
+      }
+    }
+  }
+
+  def updateCache(updateMetadataRequest: UpdateMetadataRequest,
+                  brokerId: Int,
+                  stateChangeLogger: StateChangeLogger) {
+    inWriteLock(partitionMetadataLock) {
+      aliveBrokers = updateMetadataRequest.aliveBrokers.map(b => (b.id, b)).toMap
+      updateMetadataRequest.partitionStateInfos.foreach { case(tp, info) =>
+        if (info.leaderIsrAndControllerEpoch.leaderAndIsr.leader == LeaderAndIsr.LeaderDuringDelete) {
+          removePartitionInfo(tp.topic, tp.partition)
+          stateChangeLogger.trace(("Broker %d deleted partition %s from metadata cache in response to UpdateMetadata request " +
+            "sent by controller %d epoch %d with correlation id %d")
+            .format(brokerId, tp, updateMetadataRequest.controllerId,
+            updateMetadataRequest.controllerEpoch, updateMetadataRequest.correlationId))
+        } else {
+          addOrUpdatePartitionInfo(tp.topic, tp.partition, info)
+          stateChangeLogger.trace(("Broker %d cached leader info %s for partition %s in response to UpdateMetadata request " +
+            "sent by controller %d epoch %d with correlation id %d")
+            .format(brokerId, info, tp, updateMetadataRequest.controllerId,
+            updateMetadataRequest.controllerEpoch, updateMetadataRequest.correlationId))
+        }
+      }
+    }
+  }
+
+  private def removePartitionInfo(topic: String, partitionId: Int) = {
+    cache.get(topic) match {
+      case Some(infos) => {
+        infos.remove(partitionId)
+        if(infos.isEmpty) {
+          cache.remove(topic)
+        }
+        true
+      }
+      case None => false
+    }
+  }
+}
+
diff --git a/core/src/main/scala/kafka/server/OffsetCheckpoint.scala b/core/src/main/scala/kafka/server/OffsetCheckpoint.scala
index 19f61a9718a7f..8c5b0546908d3 100644
--- a/core/src/main/scala/kafka/server/OffsetCheckpoint.scala
+++ b/core/src/main/scala/kafka/server/OffsetCheckpoint.scala
@@ -34,7 +34,8 @@ class OffsetCheckpoint(val file: File) extends Logging {
       // write to temp file and then swap with the existing file
       val temp = new File(file.getAbsolutePath + ".tmp")
 
-      val writer = new BufferedWriter(new FileWriter(temp))
+      val fileOutputStream = new FileOutputStream(temp)
+      val writer = new BufferedWriter(new OutputStreamWriter(fileOutputStream))
       try {
         // write the current version
         writer.write(0.toString)
@@ -50,8 +51,9 @@ class OffsetCheckpoint(val file: File) extends Logging {
           writer.newLine()
         }
       
-        // flush and overwrite old file
+        // flush the buffer and then fsync the underlying file
         writer.flush()
+        fileOutputStream.getFD().sync()
       } finally {
         writer.close()
       }
@@ -90,7 +92,7 @@ class OffsetCheckpoint(val file: File) extends Logging {
               val topic = pieces(0)
               val partition = pieces(1).toInt
               val offset = pieces(2).toLong
-              offsets += (TopicAndPartition(pieces(0), partition) -> offset)
+              offsets += (TopicAndPartition(topic, partition) -> offset)
               line = reader.readLine()
             }
             if(offsets.size != expectedSize)
diff --git a/core/src/main/scala/kafka/server/OffsetManager.scala b/core/src/main/scala/kafka/server/OffsetManager.scala
new file mode 100644
index 0000000000000..3c79428962604
--- /dev/null
+++ b/core/src/main/scala/kafka/server/OffsetManager.scala
@@ -0,0 +1,564 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import org.apache.kafka.common.protocol.types.{Struct, Schema, Field}
+import org.apache.kafka.common.protocol.types.Type.STRING
+import org.apache.kafka.common.protocol.types.Type.INT32
+import org.apache.kafka.common.protocol.types.Type.INT64
+
+import kafka.utils._
+import kafka.common._
+import kafka.log.{FileMessageSet, LogConfig}
+import kafka.message._
+import kafka.metrics.KafkaMetricsGroup
+import kafka.common.TopicAndPartition
+import kafka.tools.MessageFormatter
+import kafka.api.ProducerResponseStatus
+
+import scala.Some
+import scala.collection._
+import java.io.PrintStream
+import java.util.concurrent.atomic.AtomicBoolean
+import java.nio.ByteBuffer
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import com.yammer.metrics.core.Gauge
+import org.I0Itec.zkclient.ZkClient
+
+
+/**
+ * Configuration settings for in-built offset management
+ * @param maxMetadataSize The maximum allowed metadata for any offset commit.
+ * @param loadBufferSize Batch size for reading from the offsets segments when loading offsets into the cache.
+ * @param offsetsRetentionMs Offsets older than this retention period will be discarded.
+ * @param offsetsRetentionCheckIntervalMs Frequency at which to check for stale offsets.
+ * @param offsetsTopicNumPartitions The number of partitions for the offset commit topic (should not change after deployment).
+ * @param offsetsTopicSegmentBytes The offsets topic segment bytes should be kept relatively small to facilitate faster
+ *                                 log compaction and faster offset loads
+ * @param offsetsTopicReplicationFactor The replication factor for the offset commit topic (set higher to ensure availability).
+ * @param offsetsTopicCompressionCodec Compression codec for the offsets topic - compression should be turned on in
+ *                                     order to achieve "atomic" commits.
+ * @param offsetCommitTimeoutMs The offset commit will be delayed until all replicas for the offsets topic receive the
+ *                              commit or this timeout is reached. (Similar to the producer request timeout.)
+ * @param offsetCommitRequiredAcks The required acks before the commit can be accepted. In general, the default (-1)
+ *                                 should not be overridden.
+ */
+case class OffsetManagerConfig(maxMetadataSize: Int = OffsetManagerConfig.DefaultMaxMetadataSize,
+                               loadBufferSize: Int = OffsetManagerConfig.DefaultLoadBufferSize,
+                               offsetsRetentionMs: Long = 24*60*60000L,
+                               offsetsRetentionCheckIntervalMs: Long = OffsetManagerConfig.DefaultOffsetsRetentionCheckIntervalMs,
+                               offsetsTopicNumPartitions: Int = OffsetManagerConfig.DefaultOffsetsTopicNumPartitions,
+                               offsetsTopicSegmentBytes: Int = OffsetManagerConfig.DefaultOffsetsTopicSegmentBytes,
+                               offsetsTopicReplicationFactor: Short = OffsetManagerConfig.DefaultOffsetsTopicReplicationFactor,
+                               offsetsTopicCompressionCodec: CompressionCodec = OffsetManagerConfig.DefaultOffsetsTopicCompressionCodec,
+                               offsetCommitTimeoutMs: Int = OffsetManagerConfig.DefaultOffsetCommitTimeoutMs,
+                               offsetCommitRequiredAcks: Short = OffsetManagerConfig.DefaultOffsetCommitRequiredAcks)
+
+object OffsetManagerConfig {
+  val DefaultMaxMetadataSize = 4096
+  val DefaultLoadBufferSize = 5*1024*1024
+  val DefaultOffsetsRetentionCheckIntervalMs = 600000L
+  val DefaultOffsetsTopicNumPartitions = 1
+  val DefaultOffsetsTopicSegmentBytes = 100*1024*1024
+  val DefaultOffsetsTopicReplicationFactor = 1.toShort
+  val DefaultOffsetsTopicCompressionCodec = NoCompressionCodec
+  val DefaultOffsetCommitTimeoutMs = 5000
+  val DefaultOffsetCommitRequiredAcks = (-1).toShort
+}
+
+class OffsetManager(val config: OffsetManagerConfig,
+                    replicaManager: ReplicaManager,
+                    zkClient: ZkClient,
+                    scheduler: Scheduler) extends Logging with KafkaMetricsGroup {
+
+  /* offsets and metadata cache */
+  private val offsetsCache = new Pool[GroupTopicPartition, OffsetAndMetadata]
+  private val followerTransitionLock = new Object
+
+  private val loadingPartitions: mutable.Set[Int] = mutable.Set()
+
+  private val shuttingDown = new AtomicBoolean(false)
+
+  scheduler.schedule(name = "offsets-cache-compactor",
+                     fun = compact,
+                     period = config.offsetsRetentionCheckIntervalMs,
+                     unit = TimeUnit.MILLISECONDS)
+
+  newGauge("NumOffsets",
+    new Gauge[Int] {
+      def value = offsetsCache.size
+    }
+  )
+
+  newGauge("NumGroups",
+    new Gauge[Int] {
+      def value = offsetsCache.keys.map(_.group).toSet.size
+    }
+  )
+
+  private def compact() {
+    debug("Compacting offsets cache.")
+    val startMs = SystemTime.milliseconds
+
+    val staleOffsets = offsetsCache.filter(startMs - _._2.timestamp > config.offsetsRetentionMs)
+
+    debug("Found %d stale offsets (older than %d ms).".format(staleOffsets.size, config.offsetsRetentionMs))
+
+    // delete the stale offsets from the table and generate tombstone messages to remove them from the log
+    val tombstonesForPartition = staleOffsets.map { case(groupTopicAndPartition, offsetAndMetadata) =>
+      val offsetsPartition = partitionFor(groupTopicAndPartition.group)
+      trace("Removing stale offset and metadata for %s: %s".format(groupTopicAndPartition, offsetAndMetadata))
+
+      offsetsCache.remove(groupTopicAndPartition)
+
+      val commitKey = OffsetManager.offsetCommitKey(groupTopicAndPartition.group,
+        groupTopicAndPartition.topicPartition.topic, groupTopicAndPartition.topicPartition.partition)
+
+      (offsetsPartition, new Message(bytes = null, key = commitKey))
+    }.groupBy{ case (partition, tombstone) => partition }
+
+    // Append the tombstone messages to the offset partitions. It is okay if the replicas don't receive these (say,
+    // if we crash or leaders move) since the new leaders will get rid of stale offsets during their own purge cycles.
+    val numRemoved = tombstonesForPartition.flatMap { case(offsetsPartition, tombstones) =>
+      val partitionOpt = replicaManager.getPartition(OffsetManager.OffsetsTopicName, offsetsPartition)
+      partitionOpt.map { partition =>
+        val appendPartition = TopicAndPartition(OffsetManager.OffsetsTopicName, offsetsPartition)
+        val messages = tombstones.map(_._2).toSeq
+
+        trace("Marked %d offsets in %s for deletion.".format(messages.size, appendPartition))
+
+        try {
+          // do not need to require acks since even if the tombsone is lost,
+          // it will be appended again in the next purge cycle
+          partition.appendMessagesToLeader(new ByteBufferMessageSet(config.offsetsTopicCompressionCodec, messages:_*))
+          tombstones.size
+        }
+        catch {
+          case t: Throwable =>
+            error("Failed to mark %d stale offsets for deletion in %s.".format(messages.size, appendPartition), t)
+            // ignore and continue
+            0
+        }
+      }
+    }.sum
+
+    debug("Removed %d stale offsets in %d milliseconds.".format(numRemoved, SystemTime.milliseconds - startMs))
+  }
+
+  def offsetsTopicConfig: Properties = {
+    val props = new Properties
+    props.put(LogConfig.SegmentBytesProp, config.offsetsTopicSegmentBytes.toString)
+    props.put(LogConfig.CleanupPolicyProp, "compact")
+    props
+  }
+
+  def partitionFor(group: String): Int = Utils.abs(group.hashCode) % config.offsetsTopicNumPartitions
+
+  /**
+   * Fetch the current offset for the given group/topic/partition from the underlying offsets storage.
+   *
+   * @param key The requested group-topic-partition
+   * @return If the key is present, return the offset and metadata; otherwise return None
+   */
+  private def getOffset(key: GroupTopicPartition) = {
+    val offsetAndMetadata = offsetsCache.get(key)
+    if (offsetAndMetadata == null)
+      OffsetMetadataAndError.NoOffset
+    else
+      OffsetMetadataAndError(offsetAndMetadata.offset, offsetAndMetadata.metadata, ErrorMapping.NoError)
+  }
+
+  /**
+   * Put the (already committed) offset for the given group/topic/partition into the cache.
+   *
+   * @param key The group-topic-partition
+   * @param offsetAndMetadata The offset/metadata to be stored
+   */
+  private def putOffset(key: GroupTopicPartition, offsetAndMetadata: OffsetAndMetadata) {
+    offsetsCache.put(key, offsetAndMetadata)
+  }
+
+  /*
+   * Check if the offset metadata length is valid
+   */
+  def validateOffsetMetadataLength(metadata: String) : Boolean = {
+    metadata == null || metadata.length() <= config.maxMetadataSize
+  }
+
+  /**
+   * Store offsets by appending it to the replicated log and then inserting to cache
+   */
+  // TODO: generation id and consumer id is needed by coordinator to do consumer checking in the future
+  def storeOffsets(groupId: String,
+                   consumerId: String,
+                   generationId: Int,
+                   offsetMetadata: immutable.Map[TopicAndPartition, OffsetAndMetadata],
+                   responseCallback: immutable.Map[TopicAndPartition, Short] => Unit) {
+
+    // first filter out partitions with offset metadata size exceeding limit
+    // TODO: in the future we may want to only support atomic commit and hence fail the whole commit
+    val filteredOffsetMetadata = offsetMetadata.filter { case (topicAndPartition, offsetAndMetadata) =>
+      validateOffsetMetadataLength(offsetAndMetadata.metadata)
+    }
+
+    // construct the message set to append
+    val messages = filteredOffsetMetadata.map { case (topicAndPartition, offsetAndMetadata) =>
+      new Message(
+        key = OffsetManager.offsetCommitKey(groupId, topicAndPartition.topic, topicAndPartition.partition),
+        bytes = OffsetManager.offsetCommitValue(offsetAndMetadata)
+      )
+    }.toSeq
+
+    val offsetTopicPartition = TopicAndPartition(OffsetManager.OffsetsTopicName, partitionFor(groupId))
+
+    val offsetsAndMetadataMessageSet = Map(offsetTopicPartition ->
+      new ByteBufferMessageSet(config.offsetsTopicCompressionCodec, messages:_*))
+
+    // set the callback function to insert offsets into cache after log append completed
+    def putCacheCallback(responseStatus: Map[TopicAndPartition, ProducerResponseStatus]) {
+      // the append response should only contain the topics partition
+      if (responseStatus.size != 1 || ! responseStatus.contains(offsetTopicPartition))
+        throw new IllegalStateException("Append status %s should only have one partition %s"
+          .format(responseStatus, offsetTopicPartition))
+
+      // construct the commit response status and insert
+      // the offset and metadata to cache iff the append status has no error
+      val status = responseStatus(offsetTopicPartition)
+
+      val responseCode =
+        if (status.error == ErrorMapping.NoError) {
+          filteredOffsetMetadata.foreach { case (topicAndPartition, offsetAndMetadata) =>
+            putOffset(GroupTopicPartition(groupId, topicAndPartition), offsetAndMetadata)
+          }
+          ErrorMapping.NoError
+        } else {
+          debug("Offset commit %s from group %s consumer %s with generation %d failed when appending to log due to %s"
+            .format(filteredOffsetMetadata, groupId, consumerId, generationId, ErrorMapping.exceptionNameFor(status.error)))
+
+          // transform the log append error code to the corresponding the commit status error code
+          if (status.error == ErrorMapping.UnknownTopicOrPartitionCode)
+            ErrorMapping.ConsumerCoordinatorNotAvailableCode
+          else if (status.error == ErrorMapping.NotLeaderForPartitionCode)
+            ErrorMapping.NotCoordinatorForConsumerCode
+          else
+            status.error
+        }
+
+
+      // compute the final error codes for the commit response
+      val commitStatus = offsetMetadata.map { case (topicAndPartition, offsetAndMetadata) =>
+        if (validateOffsetMetadataLength(offsetAndMetadata.metadata))
+          (topicAndPartition, responseCode)
+        else
+          (topicAndPartition, ErrorMapping.OffsetMetadataTooLargeCode)
+      }
+
+      // finally trigger the callback logic passed from the API layer
+      responseCallback(commitStatus)
+    }
+
+    // call replica manager to append the offset messages
+    replicaManager.appendMessages(
+      config.offsetCommitTimeoutMs.toLong,
+      config.offsetCommitRequiredAcks,
+      true, // allow appending to internal offset topic
+      offsetsAndMetadataMessageSet,
+      putCacheCallback)
+  }
+
+  /**
+   * The most important guarantee that this API provides is that it should never return a stale offset. i.e., it either
+   * returns the current offset or it begins to sync the cache from the log (and returns an error code).
+   */
+  def getOffsets(group: String, topicPartitions: Seq[TopicAndPartition]): Map[TopicAndPartition, OffsetMetadataAndError] = {
+    trace("Getting offsets %s for group %s.".format(topicPartitions, group))
+
+    val offsetsPartition = partitionFor(group)
+
+    /**
+     * followerTransitionLock protects against fetching from an empty/cleared offset cache (i.e., cleared due to a
+     * leader->follower transition). i.e., even if leader-is-local is true a follower transition can occur right after
+     * the check and clear the cache. i.e., we would read from the empty cache and incorrectly return NoOffset.
+     */
+    followerTransitionLock synchronized {
+      if (leaderIsLocal(offsetsPartition)) {
+        if (loadingPartitions synchronized loadingPartitions.contains(offsetsPartition)) {
+          debug("Cannot fetch offsets for group %s due to ongoing offset load.".format(group))
+          topicPartitions.map { topicAndPartition =>
+            val groupTopicPartition = GroupTopicPartition(group, topicAndPartition)
+            (groupTopicPartition.topicPartition, OffsetMetadataAndError.OffsetsLoading)
+          }.toMap
+        } else {
+          if (topicPartitions.size == 0) {
+           // Return offsets for all partitions owned by this consumer group. (this only applies to consumers that commit offsets to Kafka.)
+            offsetsCache.filter(_._1.group == group).map { case(groupTopicPartition, offsetAndMetadata) =>
+              (groupTopicPartition.topicPartition, OffsetMetadataAndError(offsetAndMetadata.offset, offsetAndMetadata.metadata, ErrorMapping.NoError))
+            }.toMap
+          } else {
+            topicPartitions.map { topicAndPartition =>
+              val groupTopicPartition = GroupTopicPartition(group, topicAndPartition)
+              (groupTopicPartition.topicPartition, getOffset(groupTopicPartition))
+            }.toMap
+          }
+        }
+      } else {
+        debug("Could not fetch offsets for group %s (not offset coordinator).".format(group))
+        topicPartitions.map { topicAndPartition =>
+          val groupTopicPartition = GroupTopicPartition(group, topicAndPartition)
+          (groupTopicPartition.topicPartition, OffsetMetadataAndError.NotOffsetManagerForGroup)
+        }.toMap
+      }
+    }
+  }
+
+  /**
+   * Asynchronously read the partition from the offsets topic and populate the cache
+   */
+  def loadOffsetsFromLog(offsetsPartition: Int) {
+
+    val topicPartition = TopicAndPartition(OffsetManager.OffsetsTopicName, offsetsPartition)
+
+    loadingPartitions synchronized {
+      if (loadingPartitions.contains(offsetsPartition)) {
+        info("Offset load from %s already in progress.".format(topicPartition))
+      } else {
+        loadingPartitions.add(offsetsPartition)
+        scheduler.schedule(topicPartition.toString, loadOffsets)
+      }
+    }
+
+    def loadOffsets() {
+      info("Loading offsets from " + topicPartition)
+
+      val startMs = SystemTime.milliseconds
+      try {
+        replicaManager.logManager.getLog(topicPartition) match {
+          case Some(log) =>
+            var currOffset = log.logSegments.head.baseOffset
+            val buffer = ByteBuffer.allocate(config.loadBufferSize)
+            // loop breaks if leader changes at any time during the load, since getHighWatermark is -1
+            while (currOffset < getHighWatermark(offsetsPartition) && !shuttingDown.get()) {
+              buffer.clear()
+              val messages = log.read(currOffset, config.loadBufferSize).messageSet.asInstanceOf[FileMessageSet]
+              messages.readInto(buffer, 0)
+              val messageSet = new ByteBufferMessageSet(buffer)
+              messageSet.foreach { msgAndOffset =>
+                require(msgAndOffset.message.key != null, "Offset entry key should not be null")
+                val key = OffsetManager.readMessageKey(msgAndOffset.message.key)
+                if (msgAndOffset.message.payload == null) {
+                  if (offsetsCache.remove(key) != null)
+                    trace("Removed offset for %s due to tombstone entry.".format(key))
+                  else
+                    trace("Ignoring redundant tombstone for %s.".format(key))
+                } else {
+                  val value = OffsetManager.readMessageValue(msgAndOffset.message.payload)
+                  putOffset(key, value)
+                  trace("Loaded offset %s for %s.".format(value, key))
+                }
+                currOffset = msgAndOffset.nextOffset
+              }
+            }
+
+            if (!shuttingDown.get())
+              info("Finished loading offsets from %s in %d milliseconds."
+                   .format(topicPartition, SystemTime.milliseconds - startMs))
+          case None =>
+            warn("No log found for " + topicPartition)
+        }
+      }
+      catch {
+        case t: Throwable =>
+          error("Error in loading offsets from " + topicPartition, t)
+      }
+      finally {
+        loadingPartitions synchronized loadingPartitions.remove(offsetsPartition)
+      }
+    }
+  }
+
+  private def getHighWatermark(partitionId: Int): Long = {
+    val partitionOpt = replicaManager.getPartition(OffsetManager.OffsetsTopicName, partitionId)
+
+    val hw = partitionOpt.map { partition =>
+      partition.leaderReplicaIfLocal().map(_.highWatermark.messageOffset).getOrElse(-1L)
+    }.getOrElse(-1L)
+
+    hw
+  }
+
+  private def leaderIsLocal(partition: Int) = { getHighWatermark(partition) != -1L }
+
+  /**
+   * When this broker becomes a follower for an offsets topic partition clear out the cache for groups that belong to
+   * that partition.
+   * @param offsetsPartition Groups belonging to this partition of the offsets topic will be deleted from the cache.
+   */
+  def clearOffsetsInPartition(offsetsPartition: Int) {
+    debug("Deleting offset entries belonging to [%s,%d].".format(OffsetManager.OffsetsTopicName, offsetsPartition))
+
+    followerTransitionLock synchronized {
+      offsetsCache.keys.foreach { key =>
+        if (partitionFor(key.group) == offsetsPartition) {
+          offsetsCache.remove(key)
+        }
+      }
+    }
+  }
+
+  def shutdown() {
+    shuttingDown.set(true)
+  }
+
+}
+
+object OffsetManager {
+
+  val OffsetsTopicName = "__consumer_offsets"
+
+  private case class KeyAndValueSchemas(keySchema: Schema, valueSchema: Schema)
+
+  private val CURRENT_OFFSET_SCHEMA_VERSION = 0.toShort
+
+  private val OFFSET_COMMIT_KEY_SCHEMA_V0 = new Schema(new Field("group", STRING),
+                                                       new Field("topic", STRING),
+                                                       new Field("partition", INT32))
+  private val KEY_GROUP_FIELD = OFFSET_COMMIT_KEY_SCHEMA_V0.get("group")
+  private val KEY_TOPIC_FIELD = OFFSET_COMMIT_KEY_SCHEMA_V0.get("topic")
+  private val KEY_PARTITION_FIELD = OFFSET_COMMIT_KEY_SCHEMA_V0.get("partition")
+
+  private val OFFSET_COMMIT_VALUE_SCHEMA_V0 = new Schema(new Field("offset", INT64),
+                                                         new Field("metadata", STRING, "Associated metadata.", ""),
+                                                         new Field("timestamp", INT64))
+  private val VALUE_OFFSET_FIELD = OFFSET_COMMIT_VALUE_SCHEMA_V0.get("offset")
+  private val VALUE_METADATA_FIELD = OFFSET_COMMIT_VALUE_SCHEMA_V0.get("metadata")
+  private val VALUE_TIMESTAMP_FIELD = OFFSET_COMMIT_VALUE_SCHEMA_V0.get("timestamp")
+
+  // map of versions to schemas
+  private val OFFSET_SCHEMAS = Map(0 -> KeyAndValueSchemas(OFFSET_COMMIT_KEY_SCHEMA_V0, OFFSET_COMMIT_VALUE_SCHEMA_V0))
+
+  private val CURRENT_SCHEMA = schemaFor(CURRENT_OFFSET_SCHEMA_VERSION)
+
+  private def schemaFor(version: Int) = {
+    val schemaOpt = OFFSET_SCHEMAS.get(version)
+    schemaOpt match {
+      case Some(schema) => schema
+      case _ => throw new KafkaException("Unknown offset schema version " + version)
+    }
+  }
+
+  /**
+   * Generates the key for offset commit message for given (group, topic, partition)
+   *
+   * @return key for offset commit message
+   */
+  def offsetCommitKey(group: String, topic: String, partition: Int, versionId: Short = 0): Array[Byte] = {
+    val key = new Struct(CURRENT_SCHEMA.keySchema)
+    key.set(KEY_GROUP_FIELD, group)
+    key.set(KEY_TOPIC_FIELD, topic)
+    key.set(KEY_PARTITION_FIELD, partition)
+
+    val byteBuffer = ByteBuffer.allocate(2 /* version */ + key.sizeOf)
+    byteBuffer.putShort(CURRENT_OFFSET_SCHEMA_VERSION)
+    key.writeTo(byteBuffer)
+    byteBuffer.array()
+  }
+
+  /**
+   * Generates the payload for offset commit message from given offset and metadata
+   *
+   * @param offsetAndMetadata consumer's current offset and metadata
+   * @return payload for offset commit message
+   */
+  def offsetCommitValue(offsetAndMetadata: OffsetAndMetadata): Array[Byte] = {
+    val value = new Struct(CURRENT_SCHEMA.valueSchema)
+    value.set(VALUE_OFFSET_FIELD, offsetAndMetadata.offset)
+    value.set(VALUE_METADATA_FIELD, offsetAndMetadata.metadata)
+    value.set(VALUE_TIMESTAMP_FIELD, offsetAndMetadata.timestamp)
+
+    val byteBuffer = ByteBuffer.allocate(2 /* version */ + value.sizeOf)
+    byteBuffer.putShort(CURRENT_OFFSET_SCHEMA_VERSION)
+    value.writeTo(byteBuffer)
+    byteBuffer.array()
+  }
+
+  /**
+   * Decodes the offset messages' key
+   *
+   * @param buffer input byte-buffer
+   * @return an GroupTopicPartition object
+   */
+  def readMessageKey(buffer: ByteBuffer): GroupTopicPartition = {
+    val version = buffer.getShort()
+    val keySchema = schemaFor(version).keySchema
+    val key = keySchema.read(buffer).asInstanceOf[Struct]
+
+    val group = key.get(KEY_GROUP_FIELD).asInstanceOf[String]
+    val topic = key.get(KEY_TOPIC_FIELD).asInstanceOf[String]
+    val partition = key.get(KEY_PARTITION_FIELD).asInstanceOf[Int]
+
+    GroupTopicPartition(group, TopicAndPartition(topic, partition))
+  }
+
+  /**
+   * Decodes the offset messages' payload and retrieves offset and metadata from it
+   *
+   * @param buffer input byte-buffer
+   * @return an offset-metadata object from the message
+   */
+  def readMessageValue(buffer: ByteBuffer): OffsetAndMetadata = {
+    if(buffer == null) { // tombstone
+      null
+    } else {
+      val version = buffer.getShort()
+      val valueSchema = schemaFor(version).valueSchema
+      val value = valueSchema.read(buffer).asInstanceOf[Struct]
+
+      val offset = value.get(VALUE_OFFSET_FIELD).asInstanceOf[Long]
+      val metadata = value.get(VALUE_METADATA_FIELD).asInstanceOf[String]
+      val timestamp = value.get(VALUE_TIMESTAMP_FIELD).asInstanceOf[Long]
+
+      OffsetAndMetadata(offset, metadata, timestamp)
+    }
+  }
+
+  // Formatter for use with tools such as console consumer: Consumer should also set exclude.internal.topics to false.
+  // (specify --formatter "kafka.server.OffsetManager\$OffsetsMessageFormatter" when consuming __consumer_offsets)
+  class OffsetsMessageFormatter extends MessageFormatter {
+    def writeTo(key: Array[Byte], value: Array[Byte], output: PrintStream) {
+      val formattedKey = if (key == null) "NULL" else OffsetManager.readMessageKey(ByteBuffer.wrap(key)).toString
+      val formattedValue = if (value == null) "NULL" else OffsetManager.readMessageValue(ByteBuffer.wrap(value)).toString
+      output.write(formattedKey.getBytes)
+      output.write("::".getBytes)
+      output.write(formattedValue.getBytes)
+      output.write("\n".getBytes)
+    }
+  }
+
+}
+
+case class GroupTopicPartition(group: String, topicPartition: TopicAndPartition) {
+
+  def this(group: String, topic: String, partition: Int) =
+    this(group, new TopicAndPartition(topic, partition))
+
+  override def toString =
+    "[%s,%s,%d]".format(group, topicPartition.topic, topicPartition.partition)
+
+}
diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
index 73e605eb31bc7..6879e73028218 100644
--- a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
+++ b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
@@ -17,7 +17,9 @@
 
 package kafka.server
 
+import kafka.admin.AdminUtils
 import kafka.cluster.Broker
+import kafka.log.LogConfig
 import kafka.message.ByteBufferMessageSet
 import kafka.api.{OffsetRequest, FetchResponsePartitionData}
 import kafka.common.{KafkaStorageException, TopicAndPartition}
@@ -45,16 +47,19 @@ class ReplicaFetcherThread(name:String,
       val replica = replicaMgr.getReplica(topic, partitionId).get
       val messageSet = partitionData.messages.asInstanceOf[ByteBufferMessageSet]
 
-      if (fetchOffset != replica.logEndOffset)
-        throw new RuntimeException("Offset mismatch: fetched offset = %d, log end offset = %d.".format(fetchOffset, replica.logEndOffset))
+      if (fetchOffset != replica.logEndOffset.messageOffset)
+        throw new RuntimeException("Offset mismatch: fetched offset = %d, log end offset = %d.".format(fetchOffset, replica.logEndOffset.messageOffset))
       trace("Follower %d has replica log end offset %d for partition %s. Received %d messages and leader hw %d"
-            .format(replica.brokerId, replica.logEndOffset, topicAndPartition, messageSet.sizeInBytes, partitionData.hw))
+            .format(replica.brokerId, replica.logEndOffset.messageOffset, topicAndPartition, messageSet.sizeInBytes, partitionData.hw))
       replica.log.get.append(messageSet, assignOffsets = false)
       trace("Follower %d has replica log end offset %d after appending %d bytes of messages for partition %s"
-            .format(replica.brokerId, replica.logEndOffset, messageSet.sizeInBytes, topicAndPartition))
-      val followerHighWatermark = replica.logEndOffset.min(partitionData.hw)
-      replica.highWatermark = followerHighWatermark
-      trace("Follower %d set replica highwatermark for partition [%s,%d] to %d"
+            .format(replica.brokerId, replica.logEndOffset.messageOffset, messageSet.sizeInBytes, topicAndPartition))
+      val followerHighWatermark = replica.logEndOffset.messageOffset.min(partitionData.hw)
+      // for the follower replica, we do not need to keep
+      // its segment base offset the physical position,
+      // these values will be computed upon making the leader
+      replica.highWatermark = new LogOffsetMetadata(followerHighWatermark)
+      trace("Follower %d set replica high watermark for partition [%s,%d] to %s"
             .format(replica.brokerId, topic, partitionId, followerHighWatermark))
     } catch {
       case e: KafkaStorageException =>
@@ -80,10 +85,22 @@ class ReplicaFetcherThread(name:String,
      * There is a potential for a mismatch between the logs of the two replicas here. We don't fix this mismatch as of now.
      */
     val leaderEndOffset = simpleConsumer.earliestOrLatestOffset(topicAndPartition, OffsetRequest.LatestTime, brokerConfig.brokerId)
-    if (leaderEndOffset < replica.logEndOffset) {
+    if (leaderEndOffset < replica.logEndOffset.messageOffset) {
+      // Prior to truncating the follower's log, ensure that doing so is not disallowed by the configuration for unclean leader election.
+      // This situation could only happen if the unclean election configuration for a topic changes while a replica is down. Otherwise,
+      // we should never encounter this situation since a non-ISR leader cannot be elected if disallowed by the broker configuration.
+      if (!LogConfig.fromProps(brokerConfig.props.props, AdminUtils.fetchTopicConfig(replicaMgr.zkClient,
+        topicAndPartition.topic)).uncleanLeaderElectionEnable) {
+        // Log a fatal error and shutdown the broker to ensure that data loss does not unexpectedly occur.
+        fatal("Halting because log truncation is not allowed for topic %s,".format(topicAndPartition.topic) +
+          " Current leader %d's latest offset %d is less than replica %d's latest offset %d"
+          .format(sourceBroker.id, leaderEndOffset, brokerConfig.brokerId, replica.logEndOffset.messageOffset))
+        Runtime.getRuntime.halt(1)
+      }
+
       replicaMgr.logManager.truncateTo(Map(topicAndPartition -> leaderEndOffset))
-      warn("Replica %d for partition %s reset its fetch offset to current leader %d's latest offset %d"
-        .format(brokerConfig.brokerId, topicAndPartition, sourceBroker.id, leaderEndOffset))
+      warn("Replica %d for partition %s reset its fetch offset from %d to current leader %d's latest offset %d"
+        .format(brokerConfig.brokerId, topicAndPartition, replica.logEndOffset.messageOffset, sourceBroker.id, leaderEndOffset))
       leaderEndOffset
     } else {
       /**
@@ -94,8 +111,8 @@ class ReplicaFetcherThread(name:String,
        */
       val leaderStartOffset = simpleConsumer.earliestOrLatestOffset(topicAndPartition, OffsetRequest.EarliestTime, brokerConfig.brokerId)
       replicaMgr.logManager.truncateFullyAndStartAt(topicAndPartition, leaderStartOffset)
-      warn("Replica %d for partition %s reset its fetch offset to current leader %d's start offset %d"
-        .format(brokerConfig.brokerId, topicAndPartition, sourceBroker.id, leaderStartOffset))
+      warn("Replica %d for partition %s reset its fetch offset from %d to current leader %d's start offset %d"
+        .format(brokerConfig.brokerId, topicAndPartition, replica.logEndOffset.messageOffset, sourceBroker.id, leaderStartOffset))
       leaderStartOffset
     }
   }
diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala
index 21bba48affb38..e58fbb922e93b 100644
--- a/core/src/main/scala/kafka/server/ReplicaManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicaManager.scala
@@ -16,31 +16,55 @@
  */
 package kafka.server
 
-import kafka.cluster.{Broker, Partition, Replica}
-import collection._
-import mutable.HashMap
-import org.I0Itec.zkclient.ZkClient
-import java.io.{File, IOException}
-import java.util.concurrent.atomic.AtomicBoolean
+import kafka.api._
+import kafka.common._
 import kafka.utils._
-import kafka.log.LogManager
+import kafka.cluster.{Broker, Partition, Replica}
+import kafka.log.{LogAppendInfo, LogManager}
 import kafka.metrics.KafkaMetricsGroup
-import com.yammer.metrics.core.Gauge
-import java.util.concurrent.TimeUnit
-import kafka.common._
-import kafka.api.{StopReplicaRequest, PartitionStateInfo, LeaderAndIsrRequest}
 import kafka.controller.KafkaController
-import org.apache.log4j.Logger
+import kafka.common.TopicAndPartition
+import kafka.message.{ByteBufferMessageSet, MessageSet}
 
+import java.util.concurrent.atomic.AtomicBoolean
+import java.io.{IOException, File}
+import java.util.concurrent.TimeUnit
+import scala.Predef._
+import scala.collection._
+import scala.collection.mutable.HashMap
+import scala.collection.Map
+import scala.collection.Set
+
+import org.I0Itec.zkclient.ZkClient
+import com.yammer.metrics.core.Gauge
+
+/*
+ * Result metadata of a log append operation on the log
+ */
+case class LogAppendResult(info: LogAppendInfo, error: Option[Throwable] = None) {
+  def errorCode = error match {
+    case None => ErrorMapping.NoError
+    case Some(e) => ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])
+  }
+}
+
+/*
+ * Result metadata of a log read operation on the log
+ */
+case class LogReadResult(info: FetchDataInfo, hw: Long, readSize: Int, error: Option[Throwable] = None) {
+  def errorCode = error match {
+    case None => ErrorMapping.NoError
+    case Some(e) => ErrorMapping.codeFor(e.getClass.asInstanceOf[Class[Throwable]])
+  }
+}
 
 object ReplicaManager {
-  val UnknownLogEndOffset = -1L
   val HighWatermarkFilename = "replication-offset-checkpoint"
 }
 
-class ReplicaManager(val config: KafkaConfig, 
-                     time: Time, 
-                     val zkClient: ZkClient, 
+class ReplicaManager(val config: KafkaConfig,
+                     time: Time,
+                     val zkClient: ZkClient,
                      scheduler: Scheduler,
                      val logManager: LogManager,
                      val isShuttingDown: AtomicBoolean ) extends Logging with KafkaMetricsGroup {
@@ -48,23 +72,23 @@ class ReplicaManager(val config: KafkaConfig,
   @volatile var controllerEpoch: Int = KafkaController.InitialControllerEpoch - 1
   private val localBrokerId = config.brokerId
   private val allPartitions = new Pool[(String, Int), Partition]
-  private var leaderPartitions = new mutable.HashSet[Partition]()
-  private val leaderPartitionsLock = new Object
   private val replicaStateChangeLock = new Object
   val replicaFetcherManager = new ReplicaFetcherManager(config, this)
   private val highWatermarkCheckPointThreadStarted = new AtomicBoolean(false)
-  val highWatermarkCheckpoints = config.logDirs.map(dir => (dir, new OffsetCheckpoint(new File(dir, ReplicaManager.HighWatermarkFilename)))).toMap
+  val highWatermarkCheckpoints = config.logDirs.map(dir => (new File(dir).getAbsolutePath, new OffsetCheckpoint(new File(dir, ReplicaManager.HighWatermarkFilename)))).toMap
   private var hwThreadInitialized = false
   this.logIdent = "[Replica Manager on Broker " + localBrokerId + "]: "
-  val stateChangeLogger = Logger.getLogger(KafkaController.stateChangeLogger)
+  val stateChangeLogger = KafkaController.stateChangeLogger
+
+  val delayedProducePurgatory = new DelayedOperationPurgatory[DelayedProduce](config.brokerId, config.producerPurgatoryPurgeIntervalRequests)
+  val delayedFetchPurgatory = new DelayedOperationPurgatory[DelayedFetch](config.brokerId, config.fetchPurgatoryPurgeIntervalRequests)
+
 
   newGauge(
     "LeaderCount",
     new Gauge[Int] {
       def value = {
-        leaderPartitionsLock synchronized {
-          leaderPartitions.size
-        }
+          getLeaderPartitions().size
       }
     }
   )
@@ -84,9 +108,7 @@ class ReplicaManager(val config: KafkaConfig,
   val isrShrinkRate = newMeter("IsrShrinksPerSec",  "shrinks", TimeUnit.SECONDS)
 
   def underReplicatedPartitionCount(): Int = {
-    leaderPartitionsLock synchronized {
-      leaderPartitions.count(_.isUnderReplicated)
-    }
+      getLeaderPartitions().count(_.isUnderReplicated)
   }
 
   def startHighWaterMarksCheckPointThread() = {
@@ -95,17 +117,27 @@ class ReplicaManager(val config: KafkaConfig,
   }
 
   /**
-   * This function is only used in two places: in Partition.updateISR() and KafkaApis.handleProducerRequest().
-   * In the former case, the partition should have been created, in the latter case, return -1 will put the request into purgatory
+   * Try to complete some delayed produce requests with the request key;
+   * this can be triggered when:
+   *
+   * 1. The partition HW has changed (for acks = -1)
+   * 2. A follower replica's fetch operation is received (for acks > 1)
    */
-  def getReplicationFactorForPartition(topic: String, partitionId: Int) = {
-    val partitionOpt = getPartition(topic, partitionId)
-    partitionOpt match {
-      case Some(partition) =>
-        partition.replicationFactor
-      case None =>
-        -1
-    }
+  def tryCompleteDelayedProduce(key: DelayedOperationKey) {
+    val completed = delayedProducePurgatory.checkAndComplete(key)
+    debug("Request key %s unblocked %d producer requests.".format(key.keyLabel, completed))
+  }
+
+  /**
+   * Try to complete some delayed fetch requests with the request key;
+   * this can be triggered when:
+   *
+   * 1. The partition HW has changed (for regular fetch)
+   * 2. A new message set is appended to the local log (for follower fetch)
+   */
+  def tryCompleteDelayedFetch(key: DelayedOperationKey) {
+    val completed = delayedFetchPurgatory.checkAndComplete(key)
+    debug("Request key %s unblocked %d fetch requests.".format(key.keyLabel, completed))
   }
 
   def startup() {
@@ -119,15 +151,21 @@ class ReplicaManager(val config: KafkaConfig,
     val errorCode = ErrorMapping.NoError
     getPartition(topic, partitionId) match {
       case Some(partition) =>
-        leaderPartitionsLock synchronized {
-          leaderPartitions -= partition
-        }
         if(deletePartition) {
           val removedPartition = allPartitions.remove((topic, partitionId))
           if (removedPartition != null)
             removedPartition.delete() // this will delete the local log
         }
-      case None => //do nothing if replica no longer exists. This can happen during delete topic retries
+      case None =>
+        // Delete log and corresponding folders in case replica manager doesn't hold them anymore.
+        // This could happen when topic is being deleted while broker is down and recovers.
+        if(deletePartition) {
+          val topicAndPartition = TopicAndPartition(topic, partitionId)
+
+          if(logManager.getLog(topicAndPartition).isDefined) {
+              logManager.deleteLog(topicAndPartition)
+          }
+        }
         stateChangeLogger.trace("Broker %d ignoring stop replica (delete=%s) for partition [%s,%d] as replica doesn't exist on broker"
           .format(localBrokerId, deletePartition, topic, partitionId))
     }
@@ -157,10 +195,10 @@ class ReplicaManager(val config: KafkaConfig,
     }
   }
 
-  def getOrCreatePartition(topic: String, partitionId: Int, replicationFactor: Int): Partition = {
+  def getOrCreatePartition(topic: String, partitionId: Int): Partition = {
     var partition = allPartitions.get((topic, partitionId))
     if (partition == null) {
-      allPartitions.putIfNotExists((topic, partitionId), new Partition(topic, partitionId, replicationFactor, time, this))
+      allPartitions.putIfNotExists((topic, partitionId), new Partition(topic, partitionId, time, this))
       partition = allPartitions.get((topic, partitionId))
     }
     partition
@@ -205,7 +243,236 @@ class ReplicaManager(val config: KafkaConfig,
     }
   }
 
-  def becomeLeaderOrFollower(leaderAndISRRequest: LeaderAndIsrRequest): (collection.Map[(String, Int), Short], Short) = {
+  /**
+   * Append messages to leader replicas of the partition, and wait for them to be replicated to other replicas;
+   * the callback function will be triggered either when timeout or the required acks are satisfied
+   */
+  def appendMessages(timeout: Long,
+                     requiredAcks: Short,
+                     internalTopicsAllowed: Boolean,
+                     messagesPerPartition: Map[TopicAndPartition, MessageSet],
+                     responseCallback: Map[TopicAndPartition, ProducerResponseStatus] => Unit) {
+
+    val sTime = SystemTime.milliseconds
+    val localProduceResults = appendToLocalLog(internalTopicsAllowed, messagesPerPartition, requiredAcks)
+    debug("Produce to local log in %d ms".format(SystemTime.milliseconds - sTime))
+
+    val produceStatus = localProduceResults.map{ case (topicAndPartition, result) =>
+      topicAndPartition ->
+        ProducePartitionStatus(
+          result.info.lastOffset + 1, // required offset
+          ProducerResponseStatus(result.errorCode, result.info.firstOffset)) // response status
+    }
+
+    if(requiredAcks == 0 ||
+      requiredAcks == 1 ||
+      messagesPerPartition.size <= 0 ||
+      localProduceResults.values.count(_.error.isDefined) == messagesPerPartition.size) {
+      // in case of the following we can respond immediately:
+      //
+      // 1. required acks = 0 or 1
+      // 2. there is no data to append
+      // 3. all partition appends have failed
+      val produceResponseStatus = produceStatus.mapValues(status => status.responseStatus)
+      responseCallback(produceResponseStatus)
+    } else {
+      // create delayed produce operation
+      val produceMetadata = ProduceMetadata(requiredAcks, produceStatus)
+      val delayedProduce =  new DelayedProduce(timeout, produceMetadata, this, responseCallback)
+
+      // create a list of (topic, partition) pairs to use as keys for this delayed produce operation
+      val producerRequestKeys = messagesPerPartition.keys.map(new TopicPartitionOperationKey(_)).toSeq
+
+      // try to complete the request immediately, otherwise put it into the purgatory
+      // this is because while the delayed produce operation is being created, new
+      // requests may arrive and hence make this operation completable.
+      delayedProducePurgatory.tryCompleteElseWatch(delayedProduce, producerRequestKeys)
+    }
+  }
+
+  /**
+   * Append the messages to the local replica logs
+   */
+  private def appendToLocalLog(internalTopicsAllowed: Boolean,
+                               messagesPerPartition: Map[TopicAndPartition, MessageSet],
+                               requiredAcks: Short): Map[TopicAndPartition, LogAppendResult] = {
+    trace("Append [%s] to local log ".format(messagesPerPartition))
+    messagesPerPartition.map { case (topicAndPartition, messages) =>
+      // reject appending to internal topics if it is not allowed
+      if (Topic.InternalTopics.contains(topicAndPartition.topic) && !internalTopicsAllowed) {
+
+        (topicAndPartition, LogAppendResult(
+          LogAppendInfo.UnknownLogAppendInfo,
+          Some(new InvalidTopicException("Cannot append to internal topic %s".format(topicAndPartition.topic)))))
+      } else {
+        try {
+          val partitionOpt = getPartition(topicAndPartition.topic, topicAndPartition.partition)
+          val info = partitionOpt match {
+            case Some(partition) =>
+              partition.appendMessagesToLeader(messages.asInstanceOf[ByteBufferMessageSet], requiredAcks)
+            case None => throw new UnknownTopicOrPartitionException("Partition %s doesn't exist on %d"
+              .format(topicAndPartition, localBrokerId))
+          }
+
+          val numAppendedMessages =
+            if (info.firstOffset == -1L || info.lastOffset == -1L)
+              0
+            else
+              info.lastOffset - info.firstOffset + 1
+
+          // update stats for successfully appended bytes and messages as bytesInRate and messageInRate
+          BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).bytesInRate.mark(messages.sizeInBytes)
+          BrokerTopicStats.getBrokerAllTopicsStats.bytesInRate.mark(messages.sizeInBytes)
+          BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).messagesInRate.mark(numAppendedMessages)
+          BrokerTopicStats.getBrokerAllTopicsStats.messagesInRate.mark(numAppendedMessages)
+
+          trace("%d bytes written to log %s-%d beginning at offset %d and ending at offset %d"
+            .format(messages.size, topicAndPartition.topic, topicAndPartition.partition, info.firstOffset, info.lastOffset))
+          (topicAndPartition, LogAppendResult(info))
+        } catch {
+          // NOTE: Failed produce requests metric is not incremented for known exceptions
+          // it is supposed to indicate un-expected failures of a broker in handling a produce request
+          case e: KafkaStorageException =>
+            fatal("Halting due to unrecoverable I/O error while handling produce request: ", e)
+            Runtime.getRuntime.halt(1)
+            (topicAndPartition, null)
+          case utpe: UnknownTopicOrPartitionException =>
+            (topicAndPartition, LogAppendResult(LogAppendInfo.UnknownLogAppendInfo, Some(utpe)))
+          case nle: NotLeaderForPartitionException =>
+            (topicAndPartition, LogAppendResult(LogAppendInfo.UnknownLogAppendInfo, Some(nle)))
+          case e: Throwable =>
+            BrokerTopicStats.getBrokerTopicStats(topicAndPartition.topic).failedProduceRequestRate.mark()
+            BrokerTopicStats.getBrokerAllTopicsStats.failedProduceRequestRate.mark()
+            error("Error processing append operation on partition %s".format(topicAndPartition), e)
+            (topicAndPartition, LogAppendResult(LogAppendInfo.UnknownLogAppendInfo, Some(e)))
+        }
+      }
+    }
+  }
+
+  /**
+   * Fetch messages from the leader replica, and wait until enough data can be fetched and return;
+   * the callback function will be triggered either when timeout or required fetch info is satisfied
+   */
+  def fetchMessages(timeout: Long,
+                    replicaId: Int,
+                    fetchMinBytes: Int,
+                    fetchInfo: Map[TopicAndPartition, PartitionFetchInfo],
+                    responseCallback: Map[TopicAndPartition, FetchResponsePartitionData] => Unit) {
+
+    val fetchOnlyFromLeader: Boolean = replicaId != Request.DebuggingConsumerId
+    val fetchOnlyCommitted: Boolean = ! Request.isValidBrokerId(replicaId)
+
+    // read from local logs
+    val logReadResults = readFromLocalLog(fetchOnlyFromLeader, fetchOnlyCommitted, fetchInfo)
+
+    // if the fetch comes from the follower,
+    // update its corresponding log end offset
+    if(Request.isValidBrokerId(replicaId))
+      updateFollowerLEOs(replicaId, logReadResults.mapValues(_.info.fetchOffset))
+
+    // check if this fetch request can be satisfied right away
+    val bytesReadable = logReadResults.values.map(_.info.messageSet.sizeInBytes).sum
+    val errorReadingData = logReadResults.values.foldLeft(false) ((errorIncurred, readResult) =>
+      errorIncurred || (readResult.errorCode != ErrorMapping.NoError))
+
+    // respond immediately if 1) fetch request does not want to wait
+    //                        2) fetch request does not require any data
+    //                        3) has enough data to respond
+    //                        4) some error happens while reading data
+    if(timeout <= 0 || fetchInfo.size <= 0 || bytesReadable >= fetchMinBytes || errorReadingData) {
+      val fetchPartitionData = logReadResults.mapValues(result =>
+        FetchResponsePartitionData(result.errorCode, result.hw, result.info.messageSet))
+      responseCallback(fetchPartitionData)
+    } else {
+      // construct the fetch results from the read results
+      val fetchPartitionStatus = logReadResults.map { case (topicAndPartition, result) =>
+        (topicAndPartition, FetchPartitionStatus(result.info.fetchOffset, fetchInfo.get(topicAndPartition).get))
+      }
+      val fetchMetadata = FetchMetadata(fetchMinBytes, fetchOnlyFromLeader, fetchOnlyCommitted, fetchPartitionStatus)
+      val delayedFetch = new DelayedFetch(timeout, fetchMetadata, this, responseCallback)
+
+      // create a list of (topic, partition) pairs to use as keys for this delayed fetch operation
+      val delayedFetchKeys = fetchPartitionStatus.keys.map(new TopicPartitionOperationKey(_)).toSeq
+
+      // try to complete the request immediately, otherwise put it into the purgatory;
+      // this is because while the delayed fetch operation is being created, new requests
+      // may arrive and hence make this operation completable.
+      delayedFetchPurgatory.tryCompleteElseWatch(delayedFetch, delayedFetchKeys)
+    }
+  }
+
+  /**
+   * Read from a single topic/partition at the given offset upto maxSize bytes
+   */
+  def readFromLocalLog(fetchOnlyFromLeader: Boolean,
+                       readOnlyCommitted: Boolean,
+                       readPartitionInfo: Map[TopicAndPartition, PartitionFetchInfo]): Map[TopicAndPartition, LogReadResult] = {
+
+    readPartitionInfo.map { case (TopicAndPartition(topic, partition), PartitionFetchInfo(offset, fetchSize)) =>
+      val partitionDataAndOffsetInfo =
+        try {
+          trace("Fetching log segment for topic %s, partition %d, offset %d, size %d".format(topic, partition, offset, fetchSize))
+
+          // decide whether to only fetch from leader
+          val localReplica = if (fetchOnlyFromLeader)
+            getLeaderReplicaIfLocal(topic, partition)
+          else
+            getReplicaOrException(topic, partition)
+
+          // decide whether to only fetch committed data (i.e. messages below high watermark)
+          val maxOffsetOpt = if (readOnlyCommitted)
+            Some(localReplica.highWatermark.messageOffset)
+          else
+            None
+
+          // read on log
+          val logReadInfo = localReplica.log match {
+            case Some(log) =>
+              log.read(offset, fetchSize, maxOffsetOpt)
+            case None =>
+              error("Leader for partition [%s,%d] does not have a local log".format(topic, partition))
+              FetchDataInfo(LogOffsetMetadata.UnknownOffsetMetadata, MessageSet.Empty)
+          }
+
+          LogReadResult(logReadInfo, localReplica.highWatermark.messageOffset, fetchSize, None)
+        } catch {
+          // NOTE: Failed fetch requests metric is not incremented for known exceptions since it
+          // is supposed to indicate un-expected failure of a broker in handling a fetch request
+          case utpe: UnknownTopicOrPartitionException =>
+            LogReadResult(FetchDataInfo(LogOffsetMetadata.UnknownOffsetMetadata, MessageSet.Empty), -1L, fetchSize, Some(utpe))
+          case nle: NotLeaderForPartitionException =>
+            LogReadResult(FetchDataInfo(LogOffsetMetadata.UnknownOffsetMetadata, MessageSet.Empty), -1L, fetchSize, Some(nle))
+          case rnae: ReplicaNotAvailableException =>
+            LogReadResult(FetchDataInfo(LogOffsetMetadata.UnknownOffsetMetadata, MessageSet.Empty), -1L, fetchSize, Some(rnae))
+          case e: Throwable =>
+            BrokerTopicStats.getBrokerTopicStats(topic).failedFetchRequestRate.mark()
+            BrokerTopicStats.getBrokerAllTopicsStats().failedFetchRequestRate.mark()
+            error("Error processing fetch operation on partition [%s,%d] offset %d".format(topic, partition, offset))
+            LogReadResult(FetchDataInfo(LogOffsetMetadata.UnknownOffsetMetadata, MessageSet.Empty), -1L, fetchSize, Some(e))
+        }
+      (TopicAndPartition(topic, partition), partitionDataAndOffsetInfo)
+    }
+  }
+
+  def maybeUpdateMetadataCache(updateMetadataRequest: UpdateMetadataRequest, metadataCache: MetadataCache) {
+    replicaStateChangeLock synchronized {
+      if(updateMetadataRequest.controllerEpoch < controllerEpoch) {
+        val stateControllerEpochErrorMessage = ("Broker %d received update metadata request with correlation id %d from an " +
+          "old controller %d with epoch %d. Latest known controller epoch is %d").format(localBrokerId,
+          updateMetadataRequest.correlationId, updateMetadataRequest.controllerId, updateMetadataRequest.controllerEpoch,
+          controllerEpoch)
+        stateChangeLogger.warn(stateControllerEpochErrorMessage)
+        throw new ControllerMovedException(stateControllerEpochErrorMessage)
+      } else {
+        metadataCache.updateCache(updateMetadataRequest, localBrokerId, stateChangeLogger)
+        controllerEpoch = updateMetadataRequest.controllerEpoch
+      }
+    }
+  }
+
+  def becomeLeaderOrFollower(leaderAndISRRequest: LeaderAndIsrRequest,
+                             offsetManager: OffsetManager): (collection.Map[(String, Int), Short], Short) = {
     leaderAndISRRequest.partitionStateInfos.foreach { case ((topic, partition), stateInfo) =>
       stateChangeLogger.trace("Broker %d received LeaderAndIsr request %s correlation id %d from controller %d epoch %d for partition [%s,%d]"
                                 .format(localBrokerId, stateInfo, leaderAndISRRequest.correlationId,
@@ -215,9 +482,9 @@ class ReplicaManager(val config: KafkaConfig,
       val responseMap = new collection.mutable.HashMap[(String, Int), Short]
       if(leaderAndISRRequest.controllerEpoch < controllerEpoch) {
         leaderAndISRRequest.partitionStateInfos.foreach { case ((topic, partition), stateInfo) =>
-        stateChangeLogger.warn(("Broker %d received LeaderAndIsr request correlation id %d with an old controller epoch %d." +
-          " Latest known controller epoch is %d").format(localBrokerId, leaderAndISRRequest.correlationId,
-                                                         leaderAndISRRequest.controllerEpoch, controllerEpoch))
+        stateChangeLogger.warn(("Broker %d ignoring LeaderAndIsr request from controller %d with correlation id %d since " +
+          "its controller epoch %d is old. Latest known controller epoch is %d").format(localBrokerId, leaderAndISRRequest.controllerId,
+          leaderAndISRRequest.correlationId, leaderAndISRRequest.controllerEpoch, controllerEpoch))
         }
         (responseMap, ErrorMapping.StaleControllerEpochCode)
       } else {
@@ -228,7 +495,7 @@ class ReplicaManager(val config: KafkaConfig,
         // First check partition's leader epoch
         val partitionState = new HashMap[Partition, PartitionStateInfo]()
         leaderAndISRRequest.partitionStateInfos.foreach{ case ((topic, partitionId), partitionStateInfo) =>
-          val partition = getOrCreatePartition(topic, partitionId, partitionStateInfo.replicationFactor)
+          val partition = getOrCreatePartition(topic, partitionId)
           val partitionLeaderEpoch = partition.getLeaderEpoch()
           // If the leader epoch is valid record the epoch of the controller that made the leadership decision.
           // This is useful while updating the isr to maintain the decision maker controller's epoch in the zookeeper path
@@ -236,17 +503,17 @@ class ReplicaManager(val config: KafkaConfig,
             if(partitionStateInfo.allReplicas.contains(config.brokerId))
               partitionState.put(partition, partitionStateInfo)
             else {
-              stateChangeLogger.warn(("Broker %d ignoring LeaderAndIsr request with correlation id %d from " +
-                "controller %d epoch %d as broker is not in assigned replica list %s for partition [%s,%d]")
-                .format(localBrokerId, correlationId, controllerId, leaderAndISRRequest.controllerEpoch,
-                partitionStateInfo.allReplicas.mkString(","), topic, partition.partitionId))
+              stateChangeLogger.warn(("Broker %d ignoring LeaderAndIsr request from controller %d with correlation id %d " +
+                "epoch %d for partition [%s,%d] as itself is not in assigned replica list %s")
+                .format(localBrokerId, controllerId, correlationId, leaderAndISRRequest.controllerEpoch,
+                topic, partition.partitionId, partitionStateInfo.allReplicas.mkString(",")))
             }
           } else {
             // Otherwise record the error code in response
-            stateChangeLogger.warn(("Broker %d received invalid LeaderAndIsr request with correlation id %d from " +
-              "controller %d epoch %d with an older leader epoch %d for partition [%s,%d], current leader epoch is %d")
-              .format(localBrokerId, correlationId, controllerId, leaderAndISRRequest.controllerEpoch,
-              partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leaderEpoch, topic, partition.partitionId, partitionLeaderEpoch))
+            stateChangeLogger.warn(("Broker %d ignoring LeaderAndIsr request from controller %d with correlation id %d " +
+              "epoch %d for partition [%s,%d] since its associated leader epoch %d is old. Current leader epoch is %d")
+              .format(localBrokerId, controllerId, correlationId, leaderAndISRRequest.controllerEpoch,
+              topic, partition.partitionId, partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leaderEpoch, partitionLeaderEpoch))
             responseMap.put((topic, partitionId), ErrorMapping.StaleLeaderEpochCode)
           }
         }
@@ -255,8 +522,10 @@ class ReplicaManager(val config: KafkaConfig,
           .filter{ case (partition, partitionStateInfo) => partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leader == config.brokerId}
         val partitionsToBeFollower = (partitionState -- partitionsTobeLeader.keys)
 
-        if (!partitionsTobeLeader.isEmpty) makeLeaders(controllerId, controllerEpoch, partitionsTobeLeader, leaderAndISRRequest.correlationId, responseMap)
-        if (!partitionsToBeFollower.isEmpty) makeFollowers(controllerId, controllerEpoch, partitionsToBeFollower, leaderAndISRRequest.leaders, leaderAndISRRequest.correlationId, responseMap)
+        if (!partitionsTobeLeader.isEmpty)
+          makeLeaders(controllerId, controllerEpoch, partitionsTobeLeader, leaderAndISRRequest.correlationId, responseMap, offsetManager)
+        if (!partitionsToBeFollower.isEmpty)
+          makeFollowers(controllerId, controllerEpoch, partitionsToBeFollower, leaderAndISRRequest.leaders, leaderAndISRRequest.correlationId, responseMap, offsetManager)
 
         // we initialize highwatermark thread after the first leaderisrrequest. This ensures that all the partitions
         // have been completely populated before starting the checkpointing there by avoiding weird race conditions
@@ -271,7 +540,7 @@ class ReplicaManager(val config: KafkaConfig,
   }
 
   /*
-   * Make the current broker to become follower for a given set of partitions by:
+   * Make the current broker to become leader for a given set of partitions by:
    *
    * 1. Stop fetchers for these partitions
    * 2. Update the partition metadata in cache
@@ -283,7 +552,8 @@ class ReplicaManager(val config: KafkaConfig,
    */
   private def makeLeaders(controllerId: Int, epoch: Int,
                           partitionState: Map[Partition, PartitionStateInfo],
-                          correlationId: Int, responseMap: mutable.Map[(String, Int), Short]) = {
+                          correlationId: Int, responseMap: mutable.Map[(String, Int), Short],
+                          offsetManager: OffsetManager) = {
     partitionState.foreach(state =>
       stateChangeLogger.trace(("Broker %d handling LeaderAndIsr request correlationId %d from controller %d epoch %d " +
         "starting the become-leader transition for partition %s")
@@ -302,12 +572,8 @@ class ReplicaManager(val config: KafkaConfig,
       }
       // Update the partition information to be the leader
       partitionState.foreach{ case (partition, partitionStateInfo) =>
-        partition.makeLeader(controllerId, partitionStateInfo, correlationId)}
+        partition.makeLeader(controllerId, partitionStateInfo, correlationId, offsetManager)}
 
-      // Finally add these partitions to the list of partitions for which the leader is the current broker
-      leaderPartitionsLock synchronized {
-        leaderPartitions ++= partitionState.keySet
-      }
     } catch {
       case e: Throwable =>
         partitionState.foreach { state =>
@@ -344,61 +610,82 @@ class ReplicaManager(val config: KafkaConfig,
    * the error message will be set on each partition since we do not know which partition caused it
    */
   private def makeFollowers(controllerId: Int, epoch: Int, partitionState: Map[Partition, PartitionStateInfo],
-                            leaders: Set[Broker], correlationId: Int, responseMap: mutable.Map[(String, Int), Short]) {
-    partitionState.foreach(state =>
+                            leaders: Set[Broker], correlationId: Int, responseMap: mutable.Map[(String, Int), Short],
+                            offsetManager: OffsetManager) {
+    partitionState.foreach { state =>
       stateChangeLogger.trace(("Broker %d handling LeaderAndIsr request correlationId %d from controller %d epoch %d " +
         "starting the become-follower transition for partition %s")
-        .format(localBrokerId, correlationId, controllerId, epoch, TopicAndPartition(state._1.topic, state._1.partitionId))))
+        .format(localBrokerId, correlationId, controllerId, epoch, TopicAndPartition(state._1.topic, state._1.partitionId)))
+    }
 
     for (partition <- partitionState.keys)
       responseMap.put((partition.topic, partition.partitionId), ErrorMapping.NoError)
 
     try {
-      leaderPartitionsLock synchronized {
-        leaderPartitions --= partitionState.keySet
-      }
 
-      partitionState.foreach{ case (partition, leaderIsrAndControllerEpoch) =>
-        partition.makeFollower(controllerId, leaderIsrAndControllerEpoch, leaders, correlationId)}
+      var partitionsToMakeFollower: Set[Partition] = Set()
 
-      replicaFetcherManager.removeFetcherForPartitions(partitionState.keySet.map(new TopicAndPartition(_)))
-      partitionState.foreach { state =>
+      // TODO: Delete leaders from LeaderAndIsrRequest in 0.8.1
+      partitionState.foreach{ case (partition, partitionStateInfo) =>
+        val leaderIsrAndControllerEpoch = partitionStateInfo.leaderIsrAndControllerEpoch
+        val newLeaderBrokerId = leaderIsrAndControllerEpoch.leaderAndIsr.leader
+        leaders.find(_.id == newLeaderBrokerId) match {
+          // Only change partition state when the leader is available
+          case Some(leaderBroker) =>
+            if (partition.makeFollower(controllerId, partitionStateInfo, correlationId, offsetManager))
+              partitionsToMakeFollower += partition
+            else
+              stateChangeLogger.info(("Broker %d skipped the become-follower state change after marking its partition as follower with correlation id %d from " +
+                "controller %d epoch %d for partition [%s,%d] since the new leader %d is the same as the old leader")
+                .format(localBrokerId, correlationId, controllerId, leaderIsrAndControllerEpoch.controllerEpoch,
+                partition.topic, partition.partitionId, newLeaderBrokerId))
+          case None =>
+            // The leader broker should always be present in the leaderAndIsrRequest.
+            // If not, we should record the error message and abort the transition process for this partition
+            stateChangeLogger.error(("Broker %d received LeaderAndIsrRequest with correlation id %d from controller" +
+              " %d epoch %d for partition [%s,%d] but cannot become follower since the new leader %d is unavailable.")
+              .format(localBrokerId, correlationId, controllerId, leaderIsrAndControllerEpoch.controllerEpoch,
+              partition.topic, partition.partitionId, newLeaderBrokerId))
+            // Create the local replica even if the leader is unavailable. This is required to ensure that we include
+            // the partition's high watermark in the checkpoint file (see KAFKA-1647)
+            partition.getOrCreateReplica()
+        }
+      }
+
+      replicaFetcherManager.removeFetcherForPartitions(partitionsToMakeFollower.map(new TopicAndPartition(_)))
+      partitionsToMakeFollower.foreach { partition =>
         stateChangeLogger.trace(("Broker %d stopped fetchers as part of become-follower request from controller " +
           "%d epoch %d with correlation id %d for partition %s")
-          .format(localBrokerId, controllerId, epoch, correlationId, TopicAndPartition(state._1.topic, state._1.partitionId)))
+          .format(localBrokerId, controllerId, epoch, correlationId, TopicAndPartition(partition.topic, partition.partitionId)))
       }
 
-      logManager.truncateTo(partitionState.map{ case(partition, leaderISRAndControllerEpoch) =>
-        new TopicAndPartition(partition) -> partition.getOrCreateReplica().highWatermark
-      })
-      partitionState.foreach { state =>
-        stateChangeLogger.trace(("Broker %d truncated logs and checkpointed recovery boundaries for partition %s as part of " +
+      logManager.truncateTo(partitionsToMakeFollower.map(partition => (new TopicAndPartition(partition), partition.getOrCreateReplica().highWatermark.messageOffset)).toMap)
+
+      partitionsToMakeFollower.foreach { partition =>
+        stateChangeLogger.trace(("Broker %d truncated logs and checkpointed recovery boundaries for partition [%s,%d] as part of " +
           "become-follower request with correlation id %d from controller %d epoch %d").format(localBrokerId,
-          TopicAndPartition(state._1.topic, state._1.partitionId), correlationId, controllerId, epoch))
+          partition.topic, partition.partitionId, correlationId, controllerId, epoch))
       }
-      if (!isShuttingDown.get()) {
-        val partitionAndOffsets = mutable.Map[TopicAndPartition, BrokerAndInitialOffset]()
-        partitionState.foreach {
-          case (partition, partitionStateInfo) =>
-            val leader = partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leader
-            leaders.find(_.id == leader) match {
-              case Some(leaderBroker) =>
-                partitionAndOffsets.put(new TopicAndPartition(partition), 
-                                        BrokerAndInitialOffset(leaderBroker, partition.getReplica().get.logEndOffset))
-              case None =>
-                stateChangeLogger.trace(("Broker %d ignored the become-follower state change with correlation id %d " +
-                                         "controller %d epoch %d for partition %s since the designated leader %d " +
-                                         "cannot be found in live or shutting down brokers %s").format(localBrokerId,
-                                         correlationId, controllerId, epoch, partition, leader, leaders.mkString(",")))
-            }
+
+      if (isShuttingDown.get()) {
+        partitionsToMakeFollower.foreach { partition =>
+          stateChangeLogger.trace(("Broker %d skipped the adding-fetcher step of the become-follower state change with correlation id %d from " +
+            "controller %d epoch %d for partition [%s,%d] since it is shutting down").format(localBrokerId, correlationId,
+            controllerId, epoch, partition.topic, partition.partitionId))
         }
-        replicaFetcherManager.addFetcherForPartitions(partitionAndOffsets)
       }
       else {
-        partitionState.foreach { state =>
-          stateChangeLogger.trace(("Broker %d ignored the become-follower state change with correlation id %d from " +
-            "controller %d epoch %d for partition %s since it is shutting down").format(localBrokerId, correlationId,
-            controllerId, epoch, TopicAndPartition(state._1.topic, state._1.partitionId)))
+        // we do not need to check if the leader exists again since this has been done at the beginning of this process
+        val partitionsToMakeFollowerWithLeaderAndOffset = partitionsToMakeFollower.map(partition =>
+          new TopicAndPartition(partition) -> BrokerAndInitialOffset(
+            leaders.find(_.id == partition.leaderReplicaIdOpt.get).get,
+            partition.getReplica().get.logEndOffset.messageOffset)).toMap
+        replicaFetcherManager.addFetcherForPartitions(partitionsToMakeFollowerWithLeaderAndOffset)
+
+        partitionsToMakeFollower.foreach { partition =>
+          stateChangeLogger.trace(("Broker %d started fetcher to new leader as part of become-follower request from controller " +
+            "%d epoch %d with correlation id %d for partition [%s,%d]")
+            .format(localBrokerId, controllerId, epoch, correlationId, partition.topic, partition.partitionId))
         }
       }
     } catch {
@@ -419,30 +706,35 @@ class ReplicaManager(val config: KafkaConfig,
 
   private def maybeShrinkIsr(): Unit = {
     trace("Evaluating ISR list of partitions to see which replicas can be removed from the ISR")
-    var curLeaderPartitions: List[Partition] = null
-    leaderPartitionsLock synchronized {
-      curLeaderPartitions = leaderPartitions.toList
-    }
-    curLeaderPartitions.foreach(partition => partition.maybeShrinkIsr(config.replicaLagTimeMaxMs, config.replicaLagMaxMessages))
+    allPartitions.values.foreach(partition => partition.maybeShrinkIsr(config.replicaLagTimeMaxMs, config.replicaLagMaxMessages))
   }
 
-  def recordFollowerPosition(topic: String, partitionId: Int, replicaId: Int, offset: Long) = {
-    val partitionOpt = getPartition(topic, partitionId)
-    if(partitionOpt.isDefined) {
-      partitionOpt.get.updateLeaderHWAndMaybeExpandIsr(replicaId, offset)
-    } else {
-      warn("While recording the follower position, the partition [%s,%d] hasn't been created, skip updating leader HW".format(topic, partitionId))
+  private def updateFollowerLEOs(replicaId: Int, offsets: Map[TopicAndPartition, LogOffsetMetadata]) {
+    debug("Recording follower broker %d log end offsets: %s ".format(replicaId, offsets))
+    offsets.foreach { case (topicAndPartition, offset) =>
+      getPartition(topicAndPartition.topic, topicAndPartition.partition) match {
+        case Some(partition) =>
+          partition.updateReplicaLEO(replicaId, offset)
+
+          // for producer requests with ack > 1, we need to check
+          // if they can be unblocked after some follower's log end offsets have moved
+          tryCompleteDelayedProduce(new TopicPartitionOperationKey(topicAndPartition))
+        case None =>
+          warn("While recording the replica LEO, the partition %s hasn't been created.".format(topicAndPartition))
+      }
     }
   }
 
-  /**
-   * Flushes the highwatermark value for all partitions to the highwatermark file
-   */
+  private def getLeaderPartitions() : List[Partition] = {
+    allPartitions.values.filter(_.leaderReplicaIfLocal().isDefined).toList
+  }
+
+  // Flushes the highwatermark value for all partitions to the highwatermark file
   def checkpointHighWatermarks() {
     val replicas = allPartitions.values.map(_.getReplica(config.brokerId)).collect{case Some(replica) => replica}
-    val replicasByDir = replicas.filter(_.log.isDefined).groupBy(_.log.get.dir.getParent)
+    val replicasByDir = replicas.filter(_.log.isDefined).groupBy(_.log.get.dir.getParentFile.getAbsolutePath)
     for((dir, reps) <- replicasByDir) {
-      val hwms = reps.map(r => (new TopicAndPartition(r) -> r.highWatermark)).toMap
+      val hwms = reps.map(r => (new TopicAndPartition(r) -> r.highWatermark.messageOffset)).toMap
       try {
         highWatermarkCheckpoints(dir).write(hwms)
       } catch {
@@ -453,10 +745,14 @@ class ReplicaManager(val config: KafkaConfig,
     }
   }
 
-  def shutdown() {
-    info("Shut down")
+  // High watermark do not need to be checkpointed only when under unit tests
+  def shutdown(checkpointHW: Boolean = true) {
+    info("Shutting down")
     replicaFetcherManager.shutdown()
-    checkpointHighWatermarks()
-    info("Shutted down completely")
+    delayedFetchPurgatory.shutdown()
+    delayedProducePurgatory.shutdown()
+    if (checkpointHW)
+      checkpointHighWatermarks()
+    info("Shut down completely")
   }
 }
diff --git a/core/src/main/scala/kafka/server/RequestPurgatory.scala b/core/src/main/scala/kafka/server/RequestPurgatory.scala
deleted file mode 100644
index c064c5c4cf119..0000000000000
--- a/core/src/main/scala/kafka/server/RequestPurgatory.scala
+++ /dev/null
@@ -1,285 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.server
-
-import scala.collection._
-import java.util.concurrent._
-import java.util.concurrent.atomic._
-import kafka.network._
-import kafka.utils._
-import kafka.metrics.KafkaMetricsGroup
-import java.util
-import com.yammer.metrics.core.Gauge
-
-
-/**
- * A request whose processing needs to be delayed for at most the given delayMs
- * The associated keys are used for bookeeping, and represent the "trigger" that causes this request to check if it is satisfied,
- * for example a key could be a (topic, partition) pair.
- */
-class DelayedRequest(val keys: Seq[Any], val request: RequestChannel.Request, delayMs: Long) extends DelayedItem[RequestChannel.Request](request, delayMs) {
-  val satisfied = new AtomicBoolean(false)
-}
-
-/**
- * A helper class for dealing with asynchronous requests with a timeout. A DelayedRequest has a request to delay
- * and also a list of keys that can trigger the action. Implementations can add customized logic to control what it means for a given
- * request to be satisfied. For example it could be that we are waiting for user-specified number of acks on a given (topic, partition)
- * to be able to respond to a request or it could be that we are waiting for a given number of bytes to accumulate on a given request
- * to be able to respond to that request (in the simple case we might wait for at least one byte to avoid busy waiting).
- *
- * For us the key is generally a (topic, partition) pair.
- * By calling 
- *   watch(delayedRequest) 
- * we will add triggers for each of the given keys. It is up to the user to then call
- *   val satisfied = update(key, request) 
- * when a request relevant to the given key occurs. This triggers bookeeping logic and returns back any requests satisfied by this
- * new request.
- *
- * An implementation provides extends two helper functions
- *   def checkSatisfied(request: R, delayed: T): Boolean
- * this function returns true if the given request (in combination with whatever previous requests have happened) satisfies the delayed
- * request delayed. This method will likely also need to do whatever bookkeeping is necessary.
- *
- * The second function is
- *   def expire(delayed: T)
- * this function handles delayed requests that have hit their time limit without being satisfied.
- *
- */
-abstract class RequestPurgatory[T <: DelayedRequest, R](brokerId: Int = 0, purgeInterval: Int = 10000)
-        extends Logging with KafkaMetricsGroup {
-
-  /* a list of requests watching each key */
-  private val watchersForKey = new Pool[Any, Watchers](Some((key: Any) => new Watchers))
-
-  private val requestCounter = new AtomicInteger(0)
-
-  newGauge(
-    "PurgatorySize",
-    new Gauge[Int] {
-      def value = watchersForKey.values.map(_.numRequests).sum + expiredRequestReaper.numRequests
-    }
-  )
-
-  newGauge(
-    "NumDelayedRequests",
-    new Gauge[Int] {
-      def value = expiredRequestReaper.unsatisfied.get()
-    }
-  )
-
-  /* background thread expiring requests that have been waiting too long */
-  private val expiredRequestReaper = new ExpiredRequestReaper
-  private val expirationThread = Utils.newThread(name="request-expiration-task", runnable=expiredRequestReaper, daemon=false)
-  expirationThread.start()
-
-  /**
-   * Add a new delayed request watching the contained keys
-   */
-  def watch(delayedRequest: T) {
-    requestCounter.getAndIncrement()
-
-    for(key <- delayedRequest.keys) {
-      var lst = watchersFor(key)
-      lst.add(delayedRequest)
-    }
-    expiredRequestReaper.enqueue(delayedRequest)
-  }
-
-  /**
-   * Update any watchers and return a list of newly satisfied requests.
-   */
-  def update(key: Any, request: R): Seq[T] = {
-    val w = watchersForKey.get(key)
-    if(w == null)
-      Seq.empty
-    else
-      w.collectSatisfiedRequests(request)
-  }
-
-  private def watchersFor(key: Any) = watchersForKey.getAndMaybePut(key)
-  
-  /**
-   * Check if this request satisfied this delayed request
-   */
-  protected def checkSatisfied(request: R, delayed: T): Boolean
-
-  /**
-   * Handle an expired delayed request
-   */
-  protected def expire(delayed: T)
-
-  /**
-   * Shutdown the expirey thread
-   */
-  def shutdown() {
-    expiredRequestReaper.shutdown()
-  }
-
-  /**
-   * A linked list of DelayedRequests watching some key with some associated
-   * bookkeeping logic.
-   */
-  private class Watchers {
-
-
-    private val requests = new util.ArrayList[T]
-
-    def numRequests = requests.size
-
-    def add(t: T) {
-      synchronized {
-        requests.add(t)
-      }
-    }
-
-    def purgeSatisfied(): Int = {
-      synchronized {
-        val iter = requests.iterator()
-        var purged = 0
-        while(iter.hasNext) {
-          val curr = iter.next
-          if(curr.satisfied.get()) {
-            iter.remove()
-            purged += 1
-          }
-        }
-        purged
-      }
-    }
-
-    def collectSatisfiedRequests(request: R): Seq[T] = {
-      val response = new mutable.ArrayBuffer[T]
-      synchronized {
-        val iter = requests.iterator()
-        while(iter.hasNext) {
-          val curr = iter.next
-          if(curr.satisfied.get) {
-            // another thread has satisfied this request, remove it
-            iter.remove()
-          } else {
-            // synchronize on curr to avoid any race condition with expire
-            // on client-side.
-            val satisfied = curr synchronized checkSatisfied(request, curr)
-            if(satisfied) {
-              iter.remove()
-              val updated = curr.satisfied.compareAndSet(false, true)
-              if(updated == true) {
-                response += curr
-                expiredRequestReaper.satisfyRequest()
-              }
-            }
-          }
-        }
-      }
-      response
-    }
-  }
-
-  /**
-   * Runnable to expire requests that have sat unfullfilled past their deadline
-   */
-  private class ExpiredRequestReaper extends Runnable with Logging {
-    this.logIdent = "ExpiredRequestReaper-%d ".format(brokerId)
-
-    private val delayed = new DelayQueue[T]
-    private val running = new AtomicBoolean(true)
-    private val shutdownLatch = new CountDownLatch(1)
-
-    /* The count of elements in the delay queue that are unsatisfied */
-    private [kafka] val unsatisfied = new AtomicInteger(0)
-
-    def numRequests = delayed.size()
-
-    /** Main loop for the expiry thread */
-    def run() {
-      while(running.get) {
-        try {
-          val curr = pollExpired()
-          if (curr != null) {
-            curr synchronized {
-              expire(curr)
-            }
-          }
-          if (requestCounter.get >= purgeInterval) { // see if we need to force a full purge
-            requestCounter.set(0)
-            val purged = purgeSatisfied()
-            debug("Purged %d requests from delay queue.".format(purged))
-            val numPurgedFromWatchers = watchersForKey.values.map(_.purgeSatisfied()).sum
-            debug("Purged %d (watcher) requests.".format(numPurgedFromWatchers))
-          }
-        } catch {
-          case e: Exception =>
-            error("Error in long poll expiry thread: ", e)
-        }
-      }
-      shutdownLatch.countDown()
-    }
-
-    /** Add a request to be expired */
-    def enqueue(t: T) {
-      delayed.add(t)
-      unsatisfied.incrementAndGet()
-    }
-
-    /** Shutdown the expiry thread*/
-    def shutdown() {
-      debug("Shutting down.")
-      running.set(false)
-      shutdownLatch.await()
-      debug("Shut down complete.")
-    }
-
-    /** Record the fact that we satisfied a request in the stats for the expiry queue */
-    def satisfyRequest(): Unit = unsatisfied.getAndDecrement()
-
-    /**
-     * Get the next expired event
-     */
-    private def pollExpired(): T = {
-      while(true) {
-        val curr = delayed.poll(200L, TimeUnit.MILLISECONDS)
-        if (curr == null)
-          return null.asInstanceOf[T]
-        val updated = curr.satisfied.compareAndSet(false, true)
-        if(updated) {
-          unsatisfied.getAndDecrement()
-          return curr
-        }
-      }
-      throw new RuntimeException("This should not happen")
-    }
-
-    /**
-     * Delete all expired events from the delay queue
-     */
-    private def purgeSatisfied(): Int = {
-      var purged = 0
-      val iter = delayed.iterator()
-      while(iter.hasNext) {
-        val curr = iter.next()
-        if(curr.satisfied.get) {
-          iter.remove()
-          purged += 1
-        }
-      }
-      purged
-    }
-  }
-
-}
diff --git a/core/src/main/scala/kafka/server/TopicConfigManager.scala b/core/src/main/scala/kafka/server/TopicConfigManager.scala
index d41fd33d91406..47295d4013149 100644
--- a/core/src/main/scala/kafka/server/TopicConfigManager.scala
+++ b/core/src/main/scala/kafka/server/TopicConfigManager.scala
@@ -40,6 +40,7 @@ import org.I0Itec.zkclient.{IZkChildListener, ZkClient}
  * To update a topic config we first update the topic config properties. Then we create a new sequential
  * znode under the change path which contains the name of the topic that was updated, say
  *   /brokers/config_changes/config_change_13321
+ * This is just a notification--the actual config change is stored only once under the /brokers/topics/<topic_name>/config path.
  *   
  * This will fire a watcher on all brokers. This watcher works as follows. It reads all the config change notifications.
  * It keeps track of the highest config change suffix number it has applied previously. For any previously applied change it finds
@@ -59,7 +60,7 @@ import org.I0Itec.zkclient.{IZkChildListener, ZkClient}
  */
 class TopicConfigManager(private val zkClient: ZkClient,
                          private val logManager: LogManager,
-                         private val changeExpirationMs: Long = 10*60*1000,
+                         private val changeExpirationMs: Long = 15*60*1000,
                          private val time: Time = SystemTime) extends Logging {
   private var lastExecutedChange = -1L
   
@@ -86,7 +87,7 @@ class TopicConfigManager(private val zkClient: ZkClient,
    */
   private def processConfigChanges(notifications: Seq[String]) {
     if (notifications.size > 0) {
-      info("Processing %d topic config change notification(s)...".format(notifications.size))
+      info("Processing config change notification(s)...")
       val now = time.milliseconds
       val logs = logManager.logsByTopicPartition.toBuffer
       val logsByTopic = logs.groupBy(_._1.topic).mapValues(_.map(_._2))
@@ -94,26 +95,37 @@ class TopicConfigManager(private val zkClient: ZkClient,
         val changeId = changeNumber(notification)
         if (changeId > lastExecutedChange) {
           val changeZnode = ZkUtils.TopicConfigChangesPath + "/" + notification
-          val (topicJson, stat) = ZkUtils.readData(zkClient, changeZnode)
-          val topic = topicJson.substring(1, topicJson.length - 1) // dequote
-          if (logsByTopic.contains(topic)) {
-            /* combine the default properties with the overrides in zk to create the new LogConfig */
-            val props = new Properties(logManager.defaultConfig.toProps)
-            props.putAll(AdminUtils.fetchTopicConfig(zkClient, topic))
-            val logConfig = LogConfig.fromProps(props)
-            for (log <- logsByTopic(topic))
-              log.config = logConfig
-            lastExecutedChange = changeId
-            info("Processed topic config change %d for topic %s, setting new config to %s.".format(changeId, topic, props))
-          } else {
-            if (now - stat.getCtime > changeExpirationMs) {
-              /* this change is now obsolete, try to delete it unless it is the last change left */
-              error("Ignoring topic config change %d for topic %s since the change has expired")
-            } else {
-              error("Ignoring topic config change %d for topic %s since the topic may have been deleted")
+          val (jsonOpt, stat) = ZkUtils.readDataMaybeNull(zkClient, changeZnode)
+          if(jsonOpt.isDefined) {
+            val json = jsonOpt.get
+            val topic = json.substring(1, json.length - 1) // hacky way to dequote
+            if (logsByTopic.contains(topic)) {
+              /* combine the default properties with the overrides in zk to create the new LogConfig */
+              val props = new Properties(logManager.defaultConfig.toProps)
+              props.putAll(AdminUtils.fetchTopicConfig(zkClient, topic))
+              val logConfig = LogConfig.fromProps(props)
+              for (log <- logsByTopic(topic))
+                log.config = logConfig
+              info("Processed topic config change %d for topic %s, setting new config to %s.".format(changeId, topic, props))
+              purgeObsoleteNotifications(now, notifications)
             }
-            ZkUtils.deletePath(zkClient, changeZnode)
           }
+          lastExecutedChange = changeId
+        }
+      }
+    }
+  }
+  
+  private def purgeObsoleteNotifications(now: Long, notifications: Seq[String]) {
+    for(notification <- notifications.sorted) {
+      val (jsonOpt, stat) = ZkUtils.readDataMaybeNull(zkClient, ZkUtils.TopicConfigChangesPath + "/" + notification)
+      if(jsonOpt.isDefined) {
+        val changeZnode = ZkUtils.TopicConfigChangesPath + "/" + notification
+        if (now - stat.getCtime > changeExpirationMs) {
+          debug("Purging config change notification " + notification)
+          ZkUtils.deletePath(zkClient, changeZnode)
+        } else {
+          return
         }
       }
     }
diff --git a/core/src/main/scala/kafka/server/ZookeeperLeaderElector.scala b/core/src/main/scala/kafka/server/ZookeeperLeaderElector.scala
index e5b6ff1e2544b..a75818a724ff3 100644
--- a/core/src/main/scala/kafka/server/ZookeeperLeaderElector.scala
+++ b/core/src/main/scala/kafka/server/ZookeeperLeaderElector.scala
@@ -50,9 +50,27 @@ class ZookeeperLeaderElector(controllerContext: ControllerContext,
     }
   }
 
+  private def getControllerID(): Int = {
+    readDataMaybeNull(controllerContext.zkClient, electionPath)._1 match {
+       case Some(controller) => KafkaController.parseControllerId(controller)
+       case None => -1
+    }
+  }
+    
   def elect: Boolean = {
     val timestamp = SystemTime.milliseconds.toString
     val electString = Json.encode(Map("version" -> 1, "brokerid" -> brokerId, "timestamp" -> timestamp))
+   
+   leaderId = getControllerID 
+    /* 
+     * We can get here during the initial startup and the handleDeleted ZK callback. Because of the potential race condition, 
+     * it's possible that the controller has already been elected when we get here. This check will prevent the following 
+     * createEphemeralPath method from getting into an infinite loop if this broker is already the controller.
+     */
+    if(leaderId != -1) {
+       debug("Broker %d has been elected as leader, so stopping the election process.".format(leaderId))
+       return amILeader
+    }
 
     try {
       createEphemeralPathExpectConflictHandleZKBug(controllerContext.zkClient, electionPath, electString, brokerId,
@@ -64,15 +82,13 @@ class ZookeeperLeaderElector(controllerContext: ControllerContext,
     } catch {
       case e: ZkNodeExistsException =>
         // If someone else has written the path, then
-        leaderId = readDataMaybeNull(controllerContext.zkClient, electionPath)._1 match {
-          case Some(controller) => KafkaController.parseControllerId(controller)
-          case None => {
-            warn("A leader has been elected but just resigned, this will result in another round of election")
-            -1
-          }
-        }
+        leaderId = getControllerID 
+
         if (leaderId != -1)
           debug("Broker %d was elected as leader instead of broker %d".format(leaderId, brokerId))
+        else
+          warn("A leader has been elected but just resigned, this will result in another round of election")
+
       case e2: Throwable =>
         error("Error while electing or becoming leader on broker %d".format(brokerId), e2)
         resign()
diff --git a/core/src/main/scala/kafka/consumer/ConsoleConsumer.scala b/core/src/main/scala/kafka/tools/ConsoleConsumer.scala
similarity index 65%
rename from core/src/main/scala/kafka/consumer/ConsoleConsumer.scala
rename to core/src/main/scala/kafka/tools/ConsoleConsumer.scala
index dc066c23e9c22..323fc8566d974 100644
--- a/core/src/main/scala/kafka/consumer/ConsoleConsumer.scala
+++ b/core/src/main/scala/kafka/tools/ConsoleConsumer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package kafka.consumer
+package kafka.tools
 
 import scala.collection.JavaConversions._
 import org.I0Itec.zkclient._
@@ -27,7 +27,7 @@ import kafka.message._
 import kafka.serializer._
 import kafka.utils._
 import kafka.metrics.KafkaMetricsReporter
-
+import kafka.consumer.{Blacklist,Whitelist,ConsumerConfig,Consumer}
 
 /**
  * Consumer that dumps messages out to standard out.
@@ -54,47 +54,11 @@ object ConsoleConsumer extends Logging {
             .withRequiredArg
             .describedAs("urls")
             .ofType(classOf[String])
-    val groupIdOpt = parser.accepts("group", "The group id to consume on.")
+
+    val consumerConfigOpt = parser.accepts("consumer.config", "Consumer config properties file.")
             .withRequiredArg
-            .describedAs("gid")
-            .defaultsTo("console-consumer-" + new Random().nextInt(100000))
+            .describedAs("config file")
             .ofType(classOf[String])
-    val fetchSizeOpt = parser.accepts("fetch-size", "The amount of data to fetch in a single request.")
-            .withRequiredArg
-            .describedAs("size")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(1024 * 1024)
-    val minFetchBytesOpt = parser.accepts("min-fetch-bytes", "The min number of bytes each fetch request waits for.")
-            .withRequiredArg
-            .describedAs("bytes")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(1)
-    val maxWaitMsOpt = parser.accepts("max-wait-ms", "The max amount of time each fetch request waits.")
-            .withRequiredArg
-            .describedAs("ms")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(100)
-    val socketBufferSizeOpt = parser.accepts("socket-buffer-size", "The size of the tcp RECV size.")
-            .withRequiredArg
-            .describedAs("size")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(2 * 1024 * 1024)
-    val socketTimeoutMsOpt = parser.accepts("socket-timeout-ms", "The socket timeout used for the connection to the broker")
-            .withRequiredArg
-            .describedAs("ms")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(ConsumerConfig.SocketTimeout)
-    val refreshMetadataBackoffMsOpt = parser.accepts("refresh-leader-backoff-ms", "Backoff time before refreshing metadata")
-            .withRequiredArg
-            .describedAs("ms")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(ConsumerConfig.RefreshMetadataBackoffMs)
-    val consumerTimeoutMsOpt = parser.accepts("consumer-timeout-ms", "consumer throws timeout exception after waiting this much " +
-            "of time without incoming messages")
-            .withRequiredArg
-            .describedAs("prop")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(-1)
     val messageFormatterOpt = parser.accepts("formatter", "The name of a class to use for formatting kafka messages for display.")
             .withRequiredArg
             .describedAs("class")
@@ -104,13 +68,9 @@ object ConsoleConsumer extends Logging {
             .withRequiredArg
             .describedAs("prop")
             .ofType(classOf[String])
+    val deleteConsumerOffsetsOpt = parser.accepts("delete-consumer-offsets", "If specified, the consumer path in zookeeper is deleted when starting up");
     val resetBeginningOpt = parser.accepts("from-beginning", "If the consumer does not already have an established offset to consume from, " +
             "start with the earliest message present in the log rather than the latest message.")
-    val autoCommitIntervalOpt = parser.accepts("autocommit.interval.ms", "The time interval at which to save the current offset in ms")
-            .withRequiredArg
-            .describedAs("ms")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(ConsumerConfig.AutoCommitInterval)
     val maxMessagesOpt = parser.accepts("max-messages", "The maximum number of messages to consume before exiting. If not set, consumption is continual.")
             .withRequiredArg
             .describedAs("num_messages")
@@ -124,15 +84,15 @@ object ConsoleConsumer extends Logging {
       .describedAs("metrics dictory")
       .ofType(classOf[java.lang.String])
 
-
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "The console consumer is a tool that reads data from Kafka and outputs it to standard output.")
+      
+    var groupIdPassed = true
     val options: OptionSet = tryParse(parser, args)
     CommandLineUtils.checkRequiredArgs(parser, options, zkConnectOpt)
     val topicOrFilterOpt = List(topicIdOpt, whitelistOpt, blacklistOpt).filter(options.has)
-    if (topicOrFilterOpt.size != 1) {
-      error("Exactly one of whitelist/blacklist/topic is required.")
-      parser.printHelpOn(System.err)
-      System.exit(1)
-    }
+    if (topicOrFilterOpt.size != 1)
+      CommandLineUtils.printUsageAndDie(parser, "Exactly one of whitelist/blacklist/topic is required.")
     val topicArg = options.valueOf(topicOrFilterOpt.head)
     val filterSpec = if (options.has(blacklistOpt))
       new Blacklist(topicArg)
@@ -153,39 +113,47 @@ object ConsoleConsumer extends Logging {
       KafkaMetricsReporter.startReporters(verifiableProps)
     }
 
-    val props = new Properties()
-    props.put("group.id", options.valueOf(groupIdOpt))
-    props.put("socket.receive.buffer.bytes", options.valueOf(socketBufferSizeOpt).toString)
-    props.put("socket.timeout.ms", options.valueOf(socketTimeoutMsOpt).toString)
-    props.put("fetch.message.max.bytes", options.valueOf(fetchSizeOpt).toString)
-    props.put("fetch.min.bytes", options.valueOf(minFetchBytesOpt).toString)
-    props.put("fetch.wait.max.ms", options.valueOf(maxWaitMsOpt).toString)
-    props.put("auto.commit.enable", "true")
-    props.put("auto.commit.interval.ms", options.valueOf(autoCommitIntervalOpt).toString)
-    props.put("auto.offset.reset", if(options.has(resetBeginningOpt)) "smallest" else "largest")
-    props.put("zookeeper.connect", options.valueOf(zkConnectOpt))
-    props.put("consumer.timeout.ms", options.valueOf(consumerTimeoutMsOpt).toString)
-    props.put("refresh.leader.backoff.ms", options.valueOf(refreshMetadataBackoffMsOpt).toString)
 
-    val config = new ConsumerConfig(props)
-    val skipMessageOnError = if (options.has(skipMessageOnErrorOpt)) true else false
 
-    val messageFormatterClass = Class.forName(options.valueOf(messageFormatterOpt))
-    val formatterArgs = MessageFormatter.tryParseFormatterArgs(options.valuesOf(messageFormatterArgOpt))
+    val consumerProps = if (options.has(consumerConfigOpt))
+      Utils.loadProps(options.valueOf(consumerConfigOpt))
+    else
+      new Properties()
 
-    val maxMessages = if(options.has(maxMessagesOpt)) options.valueOf(maxMessagesOpt).intValue else -1
+    if(!consumerProps.containsKey("group.id")) {
+      consumerProps.put("group.id","console-consumer-" + new Random().nextInt(100000))
+      groupIdPassed=false
+    }
+    consumerProps.put("auto.offset.reset", if(options.has(resetBeginningOpt)) "smallest" else "largest")
+    consumerProps.put("zookeeper.connect", options.valueOf(zkConnectOpt))
+    if(!consumerProps.containsKey("dual.commit.enabled"))
+      consumerProps.put("dual.commit.enabled","false")
+    if(!consumerProps.containsKey("offsets.storage"))
+      consumerProps.put("offsets.storage","zookeeper")
 
-    val connector = Consumer.create(config)
+    if (!options.has(deleteConsumerOffsetsOpt) && options.has(resetBeginningOpt) &&
+       checkZkPathExists(options.valueOf(zkConnectOpt),"/consumers/" + consumerProps.getProperty("group.id")+ "/offsets")) {
+      System.err.println("Found previous offset information for this group "+consumerProps.getProperty("group.id")
+        +". Please use --delete-consumer-offsets to delete previous offsets metadata")
+      System.exit(1)
+    }
 
-    if(options.has(resetBeginningOpt))
-      ZkUtils.maybeDeletePath(options.valueOf(zkConnectOpt), "/consumers/" + options.valueOf(groupIdOpt))
+    if(options.has(deleteConsumerOffsetsOpt))
+      ZkUtils.maybeDeletePath(options.valueOf(zkConnectOpt), "/consumers/" + consumerProps.getProperty("group.id"))
+
+    val config = new ConsumerConfig(consumerProps)
+    val skipMessageOnError = if (options.has(skipMessageOnErrorOpt)) true else false
+    val messageFormatterClass = Class.forName(options.valueOf(messageFormatterOpt))
+    val formatterArgs = CommandLineUtils.parseKeyValueArgs(options.valuesOf(messageFormatterArgOpt))
+    val maxMessages = if(options.has(maxMessagesOpt)) options.valueOf(maxMessagesOpt).intValue else -1
+    val connector = Consumer.create(config)
 
     Runtime.getRuntime.addShutdownHook(new Thread() {
       override def run() {
         connector.shutdown()
         // if there is no group specified then avoid polluting zookeeper with persistent group data, this is a hack
-        if(!options.has(groupIdOpt))  
-          ZkUtils.maybeDeletePath(options.valueOf(zkConnectOpt), "/consumers/" + options.valueOf(groupIdOpt))
+        if(!groupIdPassed)
+          ZkUtils.maybeDeletePath(options.valueOf(zkConnectOpt), "/consumers/" + consumerProps.get("group.id"))
       }
     })
 
@@ -239,31 +207,13 @@ object ConsoleConsumer extends Logging {
     }
   }
 
-  def tryCleanupZookeeper(zkUrl: String, groupId: String) {
+  def checkZkPathExists(zkUrl: String, path: String): Boolean = {
     try {
-      val dir = "/consumers/" + groupId
-      info("Cleaning up temporary zookeeper data under " + dir + ".")
-      val zk = new ZkClient(zkUrl, 30*1000, 30*1000, ZKStringSerializer)
-      zk.deleteRecursive(dir)
-      zk.close()
+      val zk = new ZkClient(zkUrl, 30*1000,30*1000, ZKStringSerializer);
+      zk.exists(path)
     } catch {
-      case _: Throwable => // swallow
-    }
-  }
-}
-
-
-object MessageFormatter {
-  def tryParseFormatterArgs(args: Iterable[String]): Properties = {
-    val splits = args.map(_ split "=").filterNot(_ == null).filterNot(_.length == 0)
-    if(!splits.forall(_.length == 2)) {
-      System.err.println("Invalid parser arguments: " + args.mkString(" "))
-      System.exit(1)
+      case _: Throwable => false
     }
-    val props = new Properties
-    for(a <- splits)
-      props.put(a(0), a(1))
-    props
   }
 }
 
@@ -277,7 +227,7 @@ class DefaultMessageFormatter extends MessageFormatter {
   var printKey = false
   var keySeparator = "\t".getBytes
   var lineSeparator = "\n".getBytes
-  
+
   override def init(props: Properties) {
     if(props.containsKey("print.key"))
       printKey = props.getProperty("print.key").trim.toLowerCase.equals("true")
@@ -286,7 +236,7 @@ class DefaultMessageFormatter extends MessageFormatter {
     if(props.containsKey("line.separator"))
       lineSeparator = props.getProperty("line.separator").getBytes
   }
-  
+
   def writeTo(key: Array[Byte], value: Array[Byte], output: PrintStream) {
     if(printKey) {
       output.write(if (key == null) "null".getBytes() else key)
diff --git a/core/src/main/scala/kafka/tools/ConsoleProducer.scala b/core/src/main/scala/kafka/tools/ConsoleProducer.scala
new file mode 100644
index 0000000000000..ac85949447028
--- /dev/null
+++ b/core/src/main/scala/kafka/tools/ConsoleProducer.scala
@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.tools
+
+import kafka.common._
+import kafka.message._
+import kafka.serializer._
+import kafka.utils.{ToolsUtils, CommandLineUtils}
+import kafka.producer.{NewShinyProducer,OldProducer,KeyedMessage}
+
+import java.util.Properties
+import java.io._
+
+import joptsimple._
+
+object ConsoleProducer {
+
+  def main(args: Array[String]) {
+
+    val config = new ProducerConfig(args)
+    val reader = Class.forName(config.readerClass).newInstance().asInstanceOf[MessageReader]
+    val props = new Properties
+    props.putAll(config.cmdLineProps)
+    props.put("topic", config.topic)
+    reader.init(System.in, props)
+
+    try {
+        val producer =
+          if(config.useNewProducer) {
+            import org.apache.kafka.clients.producer.ProducerConfig
+
+            props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.brokerList)
+            props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, config.compressionCodec)
+            props.put(ProducerConfig.SEND_BUFFER_CONFIG, config.socketBuffer.toString)
+            props.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, config.retryBackoffMs.toString)
+            props.put(ProducerConfig.METADATA_MAX_AGE_CONFIG, config.metadataExpiryMs.toString)
+            props.put(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG, config.metadataFetchTimeoutMs.toString)
+            props.put(ProducerConfig.ACKS_CONFIG, config.requestRequiredAcks.toString)
+            props.put(ProducerConfig.TIMEOUT_CONFIG, config.requestTimeoutMs.toString)
+            props.put(ProducerConfig.RETRIES_CONFIG, config.messageSendMaxRetries.toString)
+            props.put(ProducerConfig.LINGER_MS_CONFIG, config.sendTimeout.toString)
+            if(config.queueEnqueueTimeoutMs != -1)
+              props.put(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG, "false")
+            props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, config.maxMemoryBytes.toString)
+            props.put(ProducerConfig.BATCH_SIZE_CONFIG, config.maxPartitionMemoryBytes.toString)
+            props.put(ProducerConfig.CLIENT_ID_CONFIG, "console-producer")
+
+            new NewShinyProducer(props)
+          } else {
+            props.put("metadata.broker.list", config.brokerList)
+            props.put("compression.codec", config.compressionCodec)
+            props.put("producer.type", if(config.sync) "sync" else "async")
+            props.put("batch.num.messages", config.batchSize.toString)
+            props.put("message.send.max.retries", config.messageSendMaxRetries.toString)
+            props.put("retry.backoff.ms", config.retryBackoffMs.toString)
+            props.put("queue.buffering.max.ms", config.sendTimeout.toString)
+            props.put("queue.buffering.max.messages", config.queueSize.toString)
+            props.put("queue.enqueue.timeout.ms", config.queueEnqueueTimeoutMs.toString)
+            props.put("request.required.acks", config.requestRequiredAcks.toString)
+            props.put("request.timeout.ms", config.requestTimeoutMs.toString)
+            props.put("key.serializer.class", config.keyEncoderClass)
+            props.put("serializer.class", config.valueEncoderClass)
+            props.put("send.buffer.bytes", config.socketBuffer.toString)
+            props.put("topic.metadata.refresh.interval.ms", config.metadataExpiryMs.toString)
+            props.put("client.id", "console-producer")
+
+            new OldProducer(props)
+          }
+
+        Runtime.getRuntime.addShutdownHook(new Thread() {
+          override def run() {
+            producer.close()
+          }
+        })
+
+        var message: KeyedMessage[Array[Byte], Array[Byte]] = null
+        do {
+          message = reader.readMessage()
+          if(message != null)
+            producer.send(message.topic, message.key, message.message)
+        } while(message != null)
+    } catch {
+      case e: Exception =>
+        e.printStackTrace
+        System.exit(1)
+    }
+    System.exit(0)
+  }
+
+  class ProducerConfig(args: Array[String]) {
+    val parser = new OptionParser
+    val topicOpt = parser.accepts("topic", "REQUIRED: The topic id to produce messages to.")
+      .withRequiredArg
+      .describedAs("topic")
+      .ofType(classOf[String])
+    val brokerListOpt = parser.accepts("broker-list", "REQUIRED: The broker list string in the form HOST1:PORT1,HOST2:PORT2.")
+      .withRequiredArg
+      .describedAs("broker-list")
+      .ofType(classOf[String])
+    val syncOpt = parser.accepts("sync", "If set message send requests to the brokers are synchronously, one at a time as they arrive.")
+    val compressionCodecOpt = parser.accepts("compression-codec", "The compression codec: either 'none', 'gzip', 'snappy', or 'lz4'." +
+                                                                  "If specified without value, then it defaults to 'gzip'")
+                                    .withOptionalArg()
+                                    .describedAs("compression-codec")
+                                    .ofType(classOf[String])
+    val batchSizeOpt = parser.accepts("batch-size", "Number of messages to send in a single batch if they are not being sent synchronously.")
+      .withRequiredArg
+      .describedAs("size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(200)
+    val messageSendMaxRetriesOpt = parser.accepts("message-send-max-retries", "Brokers can fail receiving the message for multiple reasons, and being unavailable transiently is just one of them. This property specifies the number of retires before the producer give up and drop this message.")
+      .withRequiredArg
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(3)
+    val retryBackoffMsOpt = parser.accepts("retry-backoff-ms", "Before each retry, the producer refreshes the metadata of relevant topics. Since leader election takes a bit of time, this property specifies the amount of time that the producer waits before refreshing the metadata.")
+      .withRequiredArg
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(100)
+    val sendTimeoutOpt = parser.accepts("timeout", "If set and the producer is running in asynchronous mode, this gives the maximum amount of time" +
+      " a message will queue awaiting sufficient batch size. The value is given in ms.")
+      .withRequiredArg
+      .describedAs("timeout_ms")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1000)
+    val queueSizeOpt = parser.accepts("queue-size", "If set and the producer is running in asynchronous mode, this gives the maximum amount of " +
+      " messages will queue awaiting sufficient batch size.")
+      .withRequiredArg
+      .describedAs("queue_size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(10000)
+    val queueEnqueueTimeoutMsOpt = parser.accepts("queue-enqueuetimeout-ms", "Timeout for event enqueue")
+      .withRequiredArg
+      .describedAs("queue enqueuetimeout ms")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(Int.MaxValue)
+    val requestRequiredAcksOpt = parser.accepts("request-required-acks", "The required acks of the producer requests")
+      .withRequiredArg
+      .describedAs("request required acks")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(0)
+    val requestTimeoutMsOpt = parser.accepts("request-timeout-ms", "The ack timeout of the producer requests. Value must be non-negative and non-zero")
+      .withRequiredArg
+      .describedAs("request timeout ms")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1500)
+    val metadataExpiryMsOpt = parser.accepts("metadata-expiry-ms",
+      "The period of time in milliseconds after which we force a refresh of metadata even if we haven't seen any leadership changes.")
+      .withRequiredArg
+      .describedAs("metadata expiration interval")
+      .ofType(classOf[java.lang.Long])
+      .defaultsTo(5*60*1000L)
+    val metadataFetchTimeoutMsOpt = parser.accepts("metadata-fetch-timeout-ms",
+      "The amount of time to block waiting to fetch metadata about a topic the first time a record is sent to that topic.")
+      .withRequiredArg
+      .describedAs("metadata fetch timeout")
+      .ofType(classOf[java.lang.Long])
+      .defaultsTo(60*1000L)
+    val maxMemoryBytesOpt = parser.accepts("max-memory-bytes",
+      "The total memory used by the producer to buffer records waiting to be sent to the server.")
+      .withRequiredArg
+      .describedAs("total memory in bytes")
+      .ofType(classOf[java.lang.Long])
+      .defaultsTo(32 * 1024 * 1024L)
+    val maxPartitionMemoryBytesOpt = parser.accepts("max-partition-memory-bytes",
+      "The buffer size allocated for a partition. When records are received which are smaller than this size the producer " +
+        "will attempt to optimistically group them together until this size is reached.")
+      .withRequiredArg
+      .describedAs("memory in bytes per partition")
+      .ofType(classOf[java.lang.Long])
+      .defaultsTo(16 * 1024L)
+    val valueEncoderOpt = parser.accepts("value-serializer", "The class name of the message encoder implementation to use for serializing values.")
+      .withRequiredArg
+      .describedAs("encoder_class")
+      .ofType(classOf[java.lang.String])
+      .defaultsTo(classOf[DefaultEncoder].getName)
+    val keyEncoderOpt = parser.accepts("key-serializer", "The class name of the message encoder implementation to use for serializing keys.")
+      .withRequiredArg
+      .describedAs("encoder_class")
+      .ofType(classOf[java.lang.String])
+      .defaultsTo(classOf[DefaultEncoder].getName)
+    val messageReaderOpt = parser.accepts("line-reader", "The class name of the class to use for reading lines from standard in. " +
+      "By default each line is read as a separate message.")
+      .withRequiredArg
+      .describedAs("reader_class")
+      .ofType(classOf[java.lang.String])
+      .defaultsTo(classOf[LineMessageReader].getName)
+    val socketBufferSizeOpt = parser.accepts("socket-buffer-size", "The size of the tcp RECV size.")
+      .withRequiredArg
+      .describedAs("size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1024*100)
+    val propertyOpt = parser.accepts("property", "A mechanism to pass user-defined properties in the form key=value to the message reader. " +
+      "This allows custom configuration for a user-defined message reader.")
+      .withRequiredArg
+      .describedAs("prop")
+      .ofType(classOf[String])
+    val useNewProducerOpt = parser.accepts("new-producer", "Use the new producer implementation.")
+
+    val options = parser.parse(args : _*)
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Read data from standard input and publish it to Kafka.")
+    CommandLineUtils.checkRequiredArgs(parser, options, topicOpt, brokerListOpt)
+
+    import scala.collection.JavaConversions._
+    val useNewProducer = options.has(useNewProducerOpt)
+    val topic = options.valueOf(topicOpt)
+    val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser,brokerList)
+    val sync = options.has(syncOpt)
+    val compressionCodecOptionValue = options.valueOf(compressionCodecOpt)
+    val compressionCodec = if (options.has(compressionCodecOpt))
+                             if (compressionCodecOptionValue == null || compressionCodecOptionValue.isEmpty)
+                               DefaultCompressionCodec.name
+                             else compressionCodecOptionValue
+                           else NoCompressionCodec.name
+    val batchSize = options.valueOf(batchSizeOpt)
+    val sendTimeout = options.valueOf(sendTimeoutOpt)
+    val queueSize = options.valueOf(queueSizeOpt)
+    val queueEnqueueTimeoutMs = options.valueOf(queueEnqueueTimeoutMsOpt)
+    val requestRequiredAcks = options.valueOf(requestRequiredAcksOpt)
+    val requestTimeoutMs = options.valueOf(requestTimeoutMsOpt)
+    val messageSendMaxRetries = options.valueOf(messageSendMaxRetriesOpt)
+    val retryBackoffMs = options.valueOf(retryBackoffMsOpt)
+    val keyEncoderClass = options.valueOf(keyEncoderOpt)
+    val valueEncoderClass = options.valueOf(valueEncoderOpt)
+    val readerClass = options.valueOf(messageReaderOpt)
+    val socketBuffer = options.valueOf(socketBufferSizeOpt)
+    val cmdLineProps = CommandLineUtils.parseKeyValueArgs(options.valuesOf(propertyOpt))
+    /* new producer related configs */
+    val maxMemoryBytes = options.valueOf(maxMemoryBytesOpt)
+    val maxPartitionMemoryBytes = options.valueOf(maxPartitionMemoryBytesOpt)
+    val metadataExpiryMs = options.valueOf(metadataExpiryMsOpt)
+    val metadataFetchTimeoutMs = options.valueOf(metadataFetchTimeoutMsOpt)
+  }
+
+  trait MessageReader {
+    def init(inputStream: InputStream, props: Properties) {}
+    def readMessage(): KeyedMessage[Array[Byte], Array[Byte]]
+    def close() {}
+  }
+
+  class LineMessageReader extends MessageReader {
+    var topic: String = null
+    var reader: BufferedReader = null
+    var parseKey = false
+    var keySeparator = "\t"
+    var ignoreError = false
+    var lineNumber = 0
+
+    override def init(inputStream: InputStream, props: Properties) {
+      topic = props.getProperty("topic")
+      if(props.containsKey("parse.key"))
+        parseKey = props.getProperty("parse.key").trim.toLowerCase.equals("true")
+      if(props.containsKey("key.separator"))
+        keySeparator = props.getProperty("key.separator")
+      if(props.containsKey("ignore.error"))
+        ignoreError = props.getProperty("ignore.error").trim.toLowerCase.equals("true")
+      reader = new BufferedReader(new InputStreamReader(inputStream))
+    }
+
+    override def readMessage() = {
+      lineNumber += 1
+      (reader.readLine(), parseKey) match {
+        case (null, _) => null
+        case (line, true) =>
+          line.indexOf(keySeparator) match {
+            case -1 =>
+              if(ignoreError)
+                new KeyedMessage[Array[Byte], Array[Byte]](topic, line.getBytes())
+              else
+                throw new KafkaException("No key found on line " + lineNumber + ": " + line)
+            case n =>
+              new KeyedMessage[Array[Byte], Array[Byte]](topic,
+                             line.substring(0, n).getBytes,
+                             (if(n + keySeparator.size > line.size) "" else line.substring(n + keySeparator.size)).getBytes())
+          }
+        case (line, false) =>
+          new KeyedMessage[Array[Byte], Array[Byte]](topic, line.getBytes())
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/kafka/tools/ConsumerOffsetChecker.scala b/core/src/main/scala/kafka/tools/ConsumerOffsetChecker.scala
index 33d7c2c4aacc9..d1e7c434e7785 100644
--- a/core/src/main/scala/kafka/tools/ConsumerOffsetChecker.scala
+++ b/core/src/main/scala/kafka/tools/ConsumerOffsetChecker.scala
@@ -20,16 +20,22 @@ package kafka.tools
 
 import joptsimple._
 import org.I0Itec.zkclient.ZkClient
-import kafka.utils.{Json, ZkUtils, ZKStringSerializer, Logging}
+import kafka.utils._
 import kafka.consumer.SimpleConsumer
-import kafka.api.{PartitionOffsetRequestInfo, OffsetRequest}
-import kafka.common.{BrokerNotAvailableException, TopicAndPartition}
+import kafka.api.{OffsetFetchResponse, OffsetFetchRequest, OffsetRequest}
+import kafka.common.{OffsetMetadataAndError, ErrorMapping, BrokerNotAvailableException, TopicAndPartition}
 import scala.collection._
-
+import kafka.client.ClientUtils
+import kafka.network.BlockingChannel
+import kafka.api.PartitionOffsetRequestInfo
+import scala.Some
+import org.I0Itec.zkclient.exception.ZkNoNodeException
 
 object ConsumerOffsetChecker extends Logging {
 
   private val consumerMap: mutable.Map[Int, Option[SimpleConsumer]] = mutable.Map()
+  private val offsetMap: mutable.Map[TopicAndPartition, Long] = mutable.Map()
+  private var topicPidMap: immutable.Map[String, Seq[Int]] = immutable.Map()
 
   private def getConsumer(zkClient: ZkClient, bid: Int): Option[SimpleConsumer] = {
     try {
@@ -49,18 +55,17 @@ object ConsumerOffsetChecker extends Logging {
       }
     } catch {
       case t: Throwable =>
-        error("Could not parse broker info", t)
+        println("Could not parse broker info due to " + t.getCause)
         None
     }
   }
 
   private def processPartition(zkClient: ZkClient,
                                group: String, topic: String, pid: Int) {
-    val offset = ZkUtils.readData(zkClient, "/consumers/%s/offsets/%s/%s".
-            format(group, topic, pid))._1.toLong
-    val owner = ZkUtils.readDataMaybeNull(zkClient, "/consumers/%s/owners/%s/%s".
-            format(group, topic, pid))._1
-
+    val topicPartition = TopicAndPartition(topic, pid)
+    val offsetOpt = offsetMap.get(topicPartition)
+    val groupDirs = new ZKGroupTopicDirs(group, topic)
+    val owner = ZkUtils.readDataMaybeNull(zkClient, groupDirs.consumerOwnerDir + "/%s".format(pid))._1
     ZkUtils.getLeaderForPartition(zkClient, topic, pid) match {
       case Some(bid) =>
         val consumerOpt = consumerMap.getOrElseUpdate(bid, getConsumer(zkClient, bid))
@@ -71,19 +76,18 @@ object ConsumerOffsetChecker extends Logging {
               OffsetRequest(immutable.Map(topicAndPartition -> PartitionOffsetRequestInfo(OffsetRequest.LatestTime, 1)))
             val logSize = consumer.getOffsetsBefore(request).partitionErrorAndOffsets(topicAndPartition).offsets.head
 
-            val lag = logSize - offset
-            println("%-15s %-30s %-3s %-15s %-15s %-15s %s".format(group, topic, pid, offset, logSize, lag,
-              owner match {case Some(ownerStr) => ownerStr case None => "none"}))
+            val lagString = offsetOpt.map(o => if (o == -1) "unknown" else (logSize - o).toString)
+            println("%-15s %-30s %-3s %-15s %-15s %-15s %s".format(group, topic, pid, offsetOpt.getOrElse("unknown"), logSize, lagString.getOrElse("unknown"),
+                                                                   owner match {case Some(ownerStr) => ownerStr case None => "none"}))
           case None => // ignore
         }
       case None =>
-        error("No broker for partition %s - %s".format(topic, pid))
+        println("No broker for partition %s - %s".format(topic, pid))
     }
   }
 
   private def processTopic(zkClient: ZkClient, group: String, topic: String) {
-    val pidMap = ZkUtils.getPartitionsForTopics(zkClient, Seq(topic))
-    pidMap.get(topic) match {
+    topicPidMap.get(topic) match {
       case Some(pids) =>
         pids.sorted.foreach {
           pid => processPartition(zkClient, group, topic, pid)
@@ -105,15 +109,23 @@ object ConsumerOffsetChecker extends Logging {
   def main(args: Array[String]) {
     val parser = new OptionParser()
 
-    val zkConnectOpt = parser.accepts("zkconnect", "ZooKeeper connect string.").
-            withRequiredArg().defaultsTo("localhost:2181").ofType(classOf[String]);
+    val zkConnectOpt = parser.accepts("zookeeper", "ZooKeeper connect string.").
+            withRequiredArg().defaultsTo("localhost:2181").ofType(classOf[String])
     val topicsOpt = parser.accepts("topic",
             "Comma-separated list of consumer topics (all topics if absent).").
             withRequiredArg().ofType(classOf[String])
     val groupOpt = parser.accepts("group", "Consumer group.").
             withRequiredArg().ofType(classOf[String])
+    val channelSocketTimeoutMsOpt = parser.accepts("socket.timeout.ms", "Socket timeout to use when querying for offsets.").
+            withRequiredArg().ofType(classOf[java.lang.Integer]).defaultsTo(6000)
+    val channelRetryBackoffMsOpt = parser.accepts("retry.backoff.ms", "Retry back-off to use for failed offset queries.").
+            withRequiredArg().ofType(classOf[java.lang.Integer]).defaultsTo(3000)
+
     parser.accepts("broker-info", "Print broker info")
     parser.accepts("help", "Print this message.")
+    
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Check the offset of your consumers.")
 
     val options = parser.parse(args : _*)
 
@@ -122,31 +134,60 @@ object ConsumerOffsetChecker extends Logging {
        System.exit(0)
     }
 
-    for (opt <- List(groupOpt))
-      if (!options.has(opt)) {
-        System.err.println("Missing required argument: %s".format(opt))
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
+    CommandLineUtils.checkRequiredArgs(parser, options, groupOpt, zkConnectOpt)
 
     val zkConnect = options.valueOf(zkConnectOpt)
+
     val group = options.valueOf(groupOpt)
-    val topics = if (options.has(topicsOpt)) Some(options.valueOf(topicsOpt))
-      else None
+    val groupDirs = new ZKGroupDirs(group)
+
+    val channelSocketTimeoutMs = options.valueOf(channelSocketTimeoutMsOpt).intValue()
+    val channelRetryBackoffMs = options.valueOf(channelRetryBackoffMsOpt).intValue()
 
+    val topics = if (options.has(topicsOpt)) Some(options.valueOf(topicsOpt)) else None
 
     var zkClient: ZkClient = null
+    var channel: BlockingChannel = null
     try {
       zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer)
 
       val topicList = topics match {
         case Some(x) => x.split(",").view.toList
-        case None => ZkUtils.getChildren(
-          zkClient, "/consumers/%s/offsets".format(group)).toList
+        case None => ZkUtils.getChildren(zkClient, groupDirs.consumerGroupDir +  "/owners").toList
       }
 
-      debug("zkConnect = %s; topics = %s; group = %s".format(
-        zkConnect, topicList.toString(), group))
+      topicPidMap = immutable.Map(ZkUtils.getPartitionsForTopics(zkClient, topicList).toSeq:_*)
+      val topicPartitions = topicPidMap.flatMap { case(topic, partitionSeq) => partitionSeq.map(TopicAndPartition(topic, _)) }.toSeq
+      val channel = ClientUtils.channelToOffsetManager(group, zkClient, channelSocketTimeoutMs, channelRetryBackoffMs)
+
+      debug("Sending offset fetch request to coordinator %s:%d.".format(channel.host, channel.port))
+      channel.send(OffsetFetchRequest(group, topicPartitions))
+      val offsetFetchResponse = OffsetFetchResponse.readFrom(channel.receive().buffer)
+      debug("Received offset fetch response %s.".format(offsetFetchResponse))
+
+      offsetFetchResponse.requestInfo.foreach { case (topicAndPartition, offsetAndMetadata) =>
+        if (offsetAndMetadata == OffsetMetadataAndError.NoOffset) {
+          val topicDirs = new ZKGroupTopicDirs(group, topicAndPartition.topic)
+          // this group may not have migrated off zookeeper for offsets storage (we don't expose the dual-commit option in this tool
+          // (meaning the lag may be off until all the consumers in the group have the same setting for offsets storage)
+          try {
+            val offset = ZkUtils.readData(zkClient, topicDirs.consumerOffsetDir + "/%d".format(topicAndPartition.partition))._1.toLong
+            offsetMap.put(topicAndPartition, offset)
+          } catch {
+            case z: ZkNoNodeException =>
+              if(ZkUtils.pathExists(zkClient,topicDirs.consumerOffsetDir))
+                offsetMap.put(topicAndPartition,-1)
+              else
+                throw z
+          }
+        }
+        else if (offsetAndMetadata.error == ErrorMapping.NoError)
+          offsetMap.put(topicAndPartition, offsetAndMetadata.offset)
+        else {
+          println("Could not fetch offset for %s due to %s.".format(topicAndPartition, ErrorMapping.exceptionFor(offsetAndMetadata.error)))
+        }
+      }
+      channel.disconnect()
 
       println("%-15s %-30s %-3s %-15s %-15s %-15s %s".format("Group", "Topic", "Pid", "Offset", "logSize", "Lag", "Owner"))
       topicList.sorted.foreach {
@@ -154,7 +195,7 @@ object ConsumerOffsetChecker extends Logging {
       }
 
       if (options.has("broker-info"))
-        printBrokerInfo();
+        printBrokerInfo()
 
       for ((_, consumerOpt) <- consumerMap)
         consumerOpt match {
@@ -162,6 +203,10 @@ object ConsumerOffsetChecker extends Logging {
           case None => // ignore
         }
     }
+    catch {
+      case t: Throwable =>
+        println("Exiting due to: %s.".format(t.getMessage))
+    }
     finally {
       for (consumerOpt <- consumerMap.values) {
         consumerOpt match {
@@ -171,7 +216,9 @@ object ConsumerOffsetChecker extends Logging {
       }
       if (zkClient != null)
         zkClient.close()
+
+      if (channel != null)
+        channel.disconnect()
     }
   }
 }
-
diff --git a/perf/src/main/scala/kafka/perf/ConsumerPerformance.scala b/core/src/main/scala/kafka/tools/ConsumerPerformance.scala
similarity index 68%
rename from perf/src/main/scala/kafka/perf/ConsumerPerformance.scala
rename to core/src/main/scala/kafka/tools/ConsumerPerformance.scala
index 55ee01b40afa4..661a5e28e82b7 100644
--- a/perf/src/main/scala/kafka/perf/ConsumerPerformance.scala
+++ b/core/src/main/scala/kafka/tools/ConsumerPerformance.scala
@@ -15,15 +15,15 @@
  * limitations under the License.
  */
 
-package kafka.perf
+package kafka.tools
 
 import java.util.concurrent.CountDownLatch
 import java.util.concurrent.atomic.AtomicLong
 import java.nio.channels.ClosedByInterruptException
 import org.apache.log4j.Logger
 import kafka.message.Message
-import kafka.utils.ZkUtils
-import java.util.{Random, Properties}
+import kafka.utils.{ZkUtils, CommandLineUtils}
+import java.util.{ Random, Properties }
 import kafka.consumer._
 import java.text.SimpleDateFormat
 
@@ -40,8 +40,8 @@ object ConsumerPerformance {
     var totalMessagesRead = new AtomicLong(0)
     var totalBytesRead = new AtomicLong(0)
 
-    if(!config.hideHeader) {
-      if(!config.showDetailedStats)
+    if (!config.hideHeader) {
+      if (!config.showDetailedStats)
         println("start.time, end.time, fetch.size, data.consumed.in.MB, MB.sec, data.consumed.in.nMsg, nMsg.sec")
       else
         println("time, fetch.size, data.consumed.in.MB, MB.sec, data.consumed.in.nMsg, nMsg.sec")
@@ -57,7 +57,7 @@ object ConsumerPerformance {
     for ((topic, streamList) <- topicMessageStreams)
       for (i <- 0 until streamList.length)
         threadList ::= new ConsumerPerfThread(i, "kafka-zk-consumer-" + i, streamList(i), config,
-                                              totalMessagesRead, totalBytesRead)
+          totalMessagesRead, totalBytesRead)
 
     logger.info("Sleeping for 1 second.")
     Thread.sleep(1000)
@@ -67,72 +67,72 @@ object ConsumerPerformance {
       thread.start
 
     for (thread <- threadList)
-      thread.shutdown
+      thread.join
 
     val endMs = System.currentTimeMillis
     val elapsedSecs = (endMs - startMs - config.consumerConfig.consumerTimeoutMs) / 1000.0
-    if(!config.showDetailedStats) {
-      val totalMBRead = (totalBytesRead.get*1.0)/(1024*1024)
+
+    if(elapsedSecs <= 0) {
+      println("The total running time should be more than 'consumer.timeout.ms'!")
+      System.exit(1)
+    }
+
+    if (!config.showDetailedStats) {
+      val totalMBRead = (totalBytesRead.get * 1.0) / (1024 * 1024)
       println(("%s, %s, %d, %.4f, %.4f, %d, %.4f").format(config.dateFormat.format(startMs), config.dateFormat.format(endMs),
-        config.consumerConfig.fetchMessageMaxBytes, totalMBRead, totalMBRead/elapsedSecs, totalMessagesRead.get,
-        totalMessagesRead.get/elapsedSecs))
+        config.consumerConfig.fetchMessageMaxBytes, totalMBRead, totalMBRead / elapsedSecs, totalMessagesRead.get,
+        totalMessagesRead.get / elapsedSecs))
     }
     System.exit(0)
   }
 
   class ConsumerPerfConfig(args: Array[String]) extends PerfConfig(args) {
     val zkConnectOpt = parser.accepts("zookeeper", "REQUIRED: The connection string for the zookeeper connection in the form host:port. " +
-                                      "Multiple URLS can be given to allow fail-over.")
-                           .withRequiredArg
-                           .describedAs("urls")
-                           .ofType(classOf[String])
+      "Multiple URLS can be given to allow fail-over.")
+      .withRequiredArg
+      .describedAs("urls")
+      .ofType(classOf[String])
     val topicOpt = parser.accepts("topic", "REQUIRED: The topic to consume from.")
       .withRequiredArg
       .describedAs("topic")
       .ofType(classOf[String])
     val groupIdOpt = parser.accepts("group", "The group id to consume on.")
-                           .withRequiredArg
-                           .describedAs("gid")
-                           .defaultsTo("perf-consumer-" + new Random().nextInt(100000))
-                           .ofType(classOf[String])
+      .withRequiredArg
+      .describedAs("gid")
+      .defaultsTo("perf-consumer-" + new Random().nextInt(100000))
+      .ofType(classOf[String])
     val fetchSizeOpt = parser.accepts("fetch-size", "The amount of data to fetch in a single request.")
-                           .withRequiredArg
-                           .describedAs("size")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(1024 * 1024)
+      .withRequiredArg
+      .describedAs("size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1024 * 1024)
     val resetBeginningOffsetOpt = parser.accepts("from-latest", "If the consumer does not already have an established " +
       "offset to consume from, start with the latest message present in the log rather than the earliest message.")
     val socketBufferSizeOpt = parser.accepts("socket-buffer-size", "The size of the tcp RECV size.")
-                           .withRequiredArg
-                           .describedAs("size")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(2 * 1024 * 1024)
+      .withRequiredArg
+      .describedAs("size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(2 * 1024 * 1024)
     val numThreadsOpt = parser.accepts("threads", "Number of processing threads.")
-                           .withRequiredArg
-                           .describedAs("count")
-                           .ofType(classOf[java.lang.Integer])
-                           .defaultsTo(10)
+      .withRequiredArg
+      .describedAs("count")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(10)
     val numFetchersOpt = parser.accepts("num-fetch-threads", "Number of fetcher threads.")
-                               .withRequiredArg
-                               .describedAs("count")
-                               .ofType(classOf[java.lang.Integer])
-                               .defaultsTo(1)
-
-    val options = parser.parse(args : _*)
-
-    for(arg <- List(topicOpt, zkConnectOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+      .withRequiredArg
+      .describedAs("count")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1)
+
+    val options = parser.parse(args: _*)
+
+    CommandLineUtils.checkRequiredArgs(parser, options, topicOpt, zkConnectOpt)
 
     val props = new Properties
     props.put("group.id", options.valueOf(groupIdOpt))
     props.put("socket.receive.buffer.bytes", options.valueOf(socketBufferSizeOpt).toString)
     props.put("fetch.message.max.bytes", options.valueOf(fetchSizeOpt).toString)
-    props.put("auto.offset.reset", if(options.has(resetBeginningOffsetOpt)) "largest" else "smallest")
+    props.put("auto.offset.reset", if (options.has(resetBeginningOffsetOpt)) "largest" else "smallest")
     props.put("zookeeper.connect", options.valueOf(zkConnectOpt))
     props.put("consumer.timeout.ms", "5000")
     props.put("num.consumer.fetchers", options.valueOf(numFetchersOpt).toString)
@@ -147,13 +147,8 @@ object ConsumerPerformance {
   }
 
   class ConsumerPerfThread(threadId: Int, name: String, stream: KafkaStream[Array[Byte], Array[Byte]],
-                           config:ConsumerPerfConfig, totalMessagesRead: AtomicLong, totalBytesRead: AtomicLong)
+    config: ConsumerPerfConfig, totalMessagesRead: AtomicLong, totalBytesRead: AtomicLong)
     extends Thread(name) {
-    private val shutdownLatch = new CountDownLatch(1)
-
-    def shutdown(): Unit = {
-      shutdownLatch.await
-    }
 
     override def run() {
       var bytesRead = 0L
@@ -164,43 +159,41 @@ object ConsumerPerformance {
       var lastMessagesRead = 0L
 
       try {
-        for (messageAndMetadata <- stream if messagesRead < config.numMessages) {
+        val iter = stream.iterator
+        while (iter.hasNext && messagesRead < config.numMessages) {
+          val messageAndMetadata = iter.next
           messagesRead += 1
           bytesRead += messageAndMetadata.message.length
 
           if (messagesRead % config.reportingInterval == 0) {
-            if(config.showDetailedStats)
+            if (config.showDetailedStats)
               printMessage(threadId, bytesRead, lastBytesRead, messagesRead, lastMessagesRead, lastReportTime, System.currentTimeMillis)
             lastReportTime = System.currentTimeMillis
             lastMessagesRead = messagesRead
             lastBytesRead = bytesRead
           }
         }
-      }
-      catch {
+      } catch {
         case _: InterruptedException =>
         case _: ClosedByInterruptException =>
         case _: ConsumerTimeoutException =>
-        case e: Throwable => throw e
+        case e: Throwable => e.printStackTrace()
       }
       totalMessagesRead.addAndGet(messagesRead)
       totalBytesRead.addAndGet(bytesRead)
-      if(config.showDetailedStats)
+      if (config.showDetailedStats)
         printMessage(threadId, bytesRead, lastBytesRead, messagesRead, lastMessagesRead, startMs, System.currentTimeMillis)
-      shutdownComplete
     }
 
     private def printMessage(id: Int, bytesRead: Long, lastBytesRead: Long, messagesRead: Long, lastMessagesRead: Long,
-                             startMs: Long, endMs: Long) = {
+      startMs: Long, endMs: Long) = {
       val elapsedMs = endMs - startMs
-      val totalMBRead = (bytesRead*1.0)/(1024*1024)
-      val mbRead = ((bytesRead - lastBytesRead)*1.0)/(1024*1024)
+      val totalMBRead = (bytesRead * 1.0) / (1024 * 1024)
+      val mbRead = ((bytesRead - lastBytesRead) * 1.0) / (1024 * 1024)
       println(("%s, %d, %d, %.4f, %.4f, %d, %.4f").format(config.dateFormat.format(endMs), id,
         config.consumerConfig.fetchMessageMaxBytes, totalMBRead,
-        1000.0*(mbRead/elapsedMs), messagesRead, ((messagesRead - lastMessagesRead)/elapsedMs)*1000.0))
+        1000.0 * (mbRead / elapsedMs), messagesRead, ((messagesRead - lastMessagesRead) / elapsedMs) * 1000.0))
     }
-
-    private def shutdownComplete() = shutdownLatch.countDown
   }
 
 }
diff --git a/core/src/main/scala/kafka/tools/DumpLogSegments.scala b/core/src/main/scala/kafka/tools/DumpLogSegments.scala
index 14f44d9305f34..fe2cc11b75f37 100644
--- a/core/src/main/scala/kafka/tools/DumpLogSegments.scala
+++ b/core/src/main/scala/kafka/tools/DumpLogSegments.scala
@@ -23,7 +23,8 @@ import kafka.log._
 import kafka.utils._
 import collection.mutable
 import joptsimple.OptionParser
-
+import kafka.serializer.Decoder
+import kafka.utils.VerifiableProperties
 
 object DumpLogSegments {
 
@@ -41,19 +42,30 @@ object DumpLogSegments {
                                   .ofType(classOf[java.lang.Integer])
                                   .defaultsTo(5 * 1024 * 1024)
     val deepIterationOpt = parser.accepts("deep-iteration", "if set, uses deep instead of shallow iteration")
+    val valueDecoderOpt = parser.accepts("value-decoder-class", "if set, used to deserialize the messages. This class should implement kafka.serializer.Decoder trait. Custom jar should be available in kafka/libs directory.")
+                               .withOptionalArg()
+                               .ofType(classOf[java.lang.String])
+                               .defaultsTo("kafka.serializer.StringDecoder")
+    val keyDecoderOpt = parser.accepts("key-decoder-class", "if set, used to deserialize the keys. This class should implement kafka.serializer.Decoder trait. Custom jar should be available in kafka/libs directory.")
+                               .withOptionalArg()
+                               .ofType(classOf[java.lang.String])
+                               .defaultsTo("kafka.serializer.StringDecoder")
+
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Parse a log file and dump its contents to the console, useful for debugging a seemingly corrupt log segment.")
 
     val options = parser.parse(args : _*)
-    if(!options.has(filesOpt)) {
-      System.err.println("Missing required argument \"" + filesOpt + "\"")
-      parser.printHelpOn(System.err)
-      System.exit(1)
-    }
+    
+    CommandLineUtils.checkRequiredArgs(parser, options, filesOpt)
 
     val print = if(options.has(printOpt)) true else false
     val verifyOnly = if(options.has(verifyOpt)) true else false
     val files = options.valueOf(filesOpt).split(",")
     val maxMessageSize = options.valueOf(maxMessageSizeOpt).intValue()
     val isDeepIteration = if(options.has(deepIterationOpt)) true else false
+  
+    val valueDecoder: Decoder[_] = Utils.createObject[Decoder[_]](options.valueOf(valueDecoderOpt), new VerifiableProperties)
+    val keyDecoder: Decoder[_] = Utils.createObject[Decoder[_]](options.valueOf(keyDecoderOpt), new VerifiableProperties)
 
     val misMatchesForIndexFilesMap = new mutable.HashMap[String, List[(Long, Long)]]
     val nonConsecutivePairsForLogFilesMap = new mutable.HashMap[String, List[(Long, Long)]]
@@ -62,7 +74,7 @@ object DumpLogSegments {
       val file = new File(arg)
       if(file.getName.endsWith(Log.LogFileSuffix)) {
         println("Dumping " + file)
-        dumpLog(file, print, nonConsecutivePairsForLogFilesMap, isDeepIteration)
+        dumpLog(file, print, nonConsecutivePairsForLogFilesMap, isDeepIteration, maxMessageSize , valueDecoder, keyDecoder)
       } else if(file.getName.endsWith(Log.IndexFileSuffix)) {
         println("Dumping " + file)
         dumpIndex(file, verifyOnly, misMatchesForIndexFilesMap, maxMessageSize)
@@ -92,8 +104,7 @@ object DumpLogSegments {
                         misMatchesForIndexFilesMap: mutable.HashMap[String, List[(Long, Long)]],
                         maxMessageSize: Int) {
     val startOffset = file.getName().split("\\.")(0).toLong
-    val logFileName = file.getAbsolutePath.split("\\.")(0) + Log.LogFileSuffix
-    val logFile = new File(logFileName)
+    val logFile = new File(file.getAbsoluteFile.getParent, file.getName.split("\\.")(0) + Log.LogFileSuffix)
     val messageSet = new FileMessageSet(logFile, false)
     val index = new OffsetIndex(file = file, baseOffset = startOffset)
     for(i <- 0 until index.entries) {
@@ -117,13 +128,17 @@ object DumpLogSegments {
   private def dumpLog(file: File,
                       printContents: Boolean,
                       nonConsecutivePairsForLogFilesMap: mutable.HashMap[String, List[(Long, Long)]],
-                      isDeepIteration: Boolean) {
+                      isDeepIteration: Boolean,
+                      maxMessageSize: Int,
+                      valueDecoder: Decoder[_],
+                      keyDecoder: Decoder[_]) {
     val startOffset = file.getName().split("\\.")(0).toLong
     println("Starting offset: " + startOffset)
     val messageSet = new FileMessageSet(file, false)
     var validBytes = 0L
     var lastOffset = -1l
-    for(shallowMessageAndOffset <- messageSet) { // this only does shallow iteration
+    val shallowIterator = messageSet.iterator(maxMessageSize)
+    for(shallowMessageAndOffset <- shallowIterator) { // this only does shallow iteration
       val itr = getIterator(shallowMessageAndOffset, isDeepIteration)
       for (messageAndOffset <- itr) {
         val msg = messageAndOffset.message
@@ -145,8 +160,8 @@ object DumpLogSegments {
           print(" keysize: " + msg.keySize)
         if(printContents) {
           if(msg.hasKey)
-            print(" key: " + Utils.readString(messageAndOffset.message.key, "UTF-8"))
-          val payload = if(messageAndOffset.message.isNull) null else Utils.readString(messageAndOffset.message.payload, "UTF-8")
+            print(" key: " + keyDecoder.fromBytes(Utils.readBytes(messageAndOffset.message.key)))
+          val payload = if(messageAndOffset.message.isNull) null else valueDecoder.fromBytes(Utils.readBytes(messageAndOffset.message.payload))
           print(" payload: " + payload)
         }
         println()
@@ -184,4 +199,5 @@ object DumpLogSegments {
       }
     }
   }
+  
 }
diff --git a/core/src/main/scala/kafka/tools/ExportZkOffsets.scala b/core/src/main/scala/kafka/tools/ExportZkOffsets.scala
index 005231f38dd9c..4d051bc2db12f 100644
--- a/core/src/main/scala/kafka/tools/ExportZkOffsets.scala
+++ b/core/src/main/scala/kafka/tools/ExportZkOffsets.scala
@@ -19,7 +19,7 @@ package kafka.tools
 
 import java.io.FileWriter
 import joptsimple._
-import kafka.utils.{Logging, ZkUtils, ZKStringSerializer,ZKGroupTopicDirs}
+import kafka.utils.{Logging, ZkUtils, ZKStringSerializer, ZKGroupTopicDirs, CommandLineUtils}
 import org.I0Itec.zkclient.ZkClient
 
 
@@ -55,6 +55,9 @@ object ExportZkOffsets extends Logging {
                             .withRequiredArg()
                             .ofType(classOf[String])
     parser.accepts("help", "Print this message.")
+    
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Export consumer offsets to an output file.")
             
     val options = parser.parse(args : _*)
     
@@ -63,13 +66,7 @@ object ExportZkOffsets extends Logging {
        System.exit(0)
     }
     
-    for (opt <- List(zkConnectOpt, outFileOpt)) {
-      if (!options.has(opt)) {
-        System.err.println("Missing required argument: %s".format(opt))
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, zkConnectOpt, outFileOpt)
     
     val zkConnect  = options.valueOf(zkConnectOpt)
     val groups     = options.valuesOf(groupOpt)
diff --git a/core/src/main/scala/kafka/tools/GetOffsetShell.scala b/core/src/main/scala/kafka/tools/GetOffsetShell.scala
index fba652e3716a6..3d9293e4abbe3 100644
--- a/core/src/main/scala/kafka/tools/GetOffsetShell.scala
+++ b/core/src/main/scala/kafka/tools/GetOffsetShell.scala
@@ -23,7 +23,7 @@ import joptsimple._
 import kafka.api.{PartitionOffsetRequestInfo, OffsetRequest}
 import kafka.common.TopicAndPartition
 import kafka.client.ClientUtils
-import kafka.utils.CommandLineUtils
+import kafka.utils.{ToolsUtils, CommandLineUtils}
 
 
 object GetOffsetShell {
@@ -57,13 +57,18 @@ object GetOffsetShell {
                            .describedAs("ms")
                            .ofType(classOf[java.lang.Integer])
                            .defaultsTo(1000)
+                           
+   if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "An interactive shell for getting consumer offsets.")
 
     val options = parser.parse(args : _*)
 
     CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, topicOpt, timeOpt)
 
     val clientId = "GetOffsetShell"
-    val metadataTargetBrokers = ClientUtils.parseBrokerList(options.valueOf(brokerListOpt))
+    val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser, brokerList)
+    val metadataTargetBrokers = ClientUtils.parseBrokerList(brokerList)
     val topic = options.valueOf(topicOpt)
     var partitionList = options.valueOf(partitionOpt)
     var time = options.valueOf(timeOpt).longValue
diff --git a/core/src/main/scala/kafka/tools/ImportZkOffsets.scala b/core/src/main/scala/kafka/tools/ImportZkOffsets.scala
index c8023ee60c07b..abe09721b13f7 100644
--- a/core/src/main/scala/kafka/tools/ImportZkOffsets.scala
+++ b/core/src/main/scala/kafka/tools/ImportZkOffsets.scala
@@ -20,7 +20,7 @@ package kafka.tools
 import java.io.BufferedReader
 import java.io.FileReader
 import joptsimple._
-import kafka.utils.{Logging, ZkUtils,ZKStringSerializer}
+import kafka.utils.{Logging, ZkUtils,ZKStringSerializer, CommandLineUtils}
 import org.I0Itec.zkclient.ZkClient
 
 
@@ -52,6 +52,9 @@ object ImportZkOffsets extends Logging {
                             .withRequiredArg()
                             .ofType(classOf[String])
     parser.accepts("help", "Print this message.")
+    
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Import offsets to zookeeper from files.")
             
     val options = parser.parse(args : _*)
     
@@ -60,13 +63,7 @@ object ImportZkOffsets extends Logging {
        System.exit(0)
     }
     
-    for (opt <- List(inFileOpt)) {
-      if (!options.has(opt)) {
-        System.err.println("Missing required argument: %s".format(opt))
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, inFileOpt)
     
     val zkConnect           = options.valueOf(zkConnectOpt)
     val partitionOffsetFile = options.valueOf(inFileOpt)
diff --git a/core/src/main/scala/kafka/tools/JmxTool.scala b/core/src/main/scala/kafka/tools/JmxTool.scala
index 747a675455e9a..1d1a120c45ff7 100644
--- a/core/src/main/scala/kafka/tools/JmxTool.scala
+++ b/core/src/main/scala/kafka/tools/JmxTool.scala
@@ -26,7 +26,7 @@ import joptsimple.OptionParser
 import scala.collection.JavaConversions._
 import scala.collection.mutable
 import scala.math._
-import kafka.utils.Logging
+import kafka.utils.{CommandLineUtils, Logging}
 
 object JmxTool extends Logging {
 
@@ -63,6 +63,9 @@ object JmxTool extends Logging {
         .describedAs("service-url")
         .ofType(classOf[String])
         .defaultsTo("service:jmx:rmi:///jndi/rmi://:9999/jmxrmi")
+        
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Dump JMX values to standard output.")
 
     val options = parser.parse(args : _*)
 
diff --git a/core/src/main/scala/kafka/tools/MirrorMaker.scala b/core/src/main/scala/kafka/tools/MirrorMaker.scala
index f0f871c22dea3..53cb16c2949e0 100644
--- a/core/src/main/scala/kafka/tools/MirrorMaker.scala
+++ b/core/src/main/scala/kafka/tools/MirrorMaker.scala
@@ -17,23 +17,65 @@
 
 package kafka.tools
 
-import joptsimple.OptionParser
-import kafka.utils.{Utils, CommandLineUtils, Logging}
-import kafka.producer.{KeyedMessage, ProducerConfig, Producer}
-import scala.collection.JavaConversions._
-import java.util.concurrent.CountDownLatch
-import java.nio.ByteBuffer
+import com.yammer.metrics.core._
+import kafka.common.{TopicAndPartition, OffsetAndMetadata}
+import kafka.javaapi.consumer.ConsumerRebalanceListener
+import kafka.utils._
 import kafka.consumer._
 import kafka.serializer._
-import collection.mutable.ListBuffer
-import kafka.tools.KafkaMigrationTool.{ProducerThread, ProducerDataChannel}
-import kafka.javaapi
+import kafka.producer.{OldProducer, NewShinyProducer}
+import kafka.metrics.KafkaMetricsGroup
+import org.apache.kafka.clients.producer.internals.ErrorLoggingCallback
+import org.apache.kafka.clients.producer.{RecordMetadata, ProducerRecord}
 
-object MirrorMaker extends Logging {
+import scala.collection.JavaConversions._
+
+import joptsimple.OptionParser
+import java.util.Properties
+import java.util.concurrent.atomic.{AtomicInteger, AtomicBoolean}
+import java.util.concurrent._
 
-  private var connectors: Seq[ZookeeperConsumerConnector] = null
-  private var consumerThreads: Seq[MirrorMakerThread] = null
-  private var producerThreads: ListBuffer[ProducerThread] = null
+/**
+ * The mirror maker consists of three major modules:
+ *  Consumer Threads - The consumer threads consume messages from source Kafka cluster through
+ *                     ZookeeperConsumerConnector and put them into corresponding data channel queue based on hash value
+ *                     of source topic-partitionId string. This guarantees the message order in source partition is
+ *                     preserved.
+ *  Producer Threads - Producer threads take messages out of data channel queues and send them to target cluster. Each
+ *                     producer thread is bound to one data channel queue, so that the message order is preserved.
+ *  Data Channel - The data channel has multiple queues. The number of queue is same as number of producer threads.
+ *
+ * If new producer is used, the offset will be committed based on the new producer's callback. An offset map is
+ * maintained and updated on each send() callback. A separate offset commit thread will commit the offset periodically.
+ * @note For mirror maker, MaxInFlightRequests of producer should be set to 1 for producer if the order of the messages
+ *       needs to be preserved. Mirror maker also depends on the in-order delivery to guarantee no data loss.
+ *       We are not force it to be 1 because in some use cases throughput might be important whereas out of order or
+ *       minor data loss is acceptable.
+ */
+object MirrorMaker extends Logging with KafkaMetricsGroup {
+
+  private var connector: ZookeeperConsumerConnector = null
+  private var consumerThreads: Seq[ConsumerThread] = null
+  private var producerThreads: Seq[ProducerThread] = null
+  private val isShuttingdown: AtomicBoolean = new AtomicBoolean(false)
+  private var offsetCommitThread: OffsetCommitThread = null
+
+  private val valueFactory = (k: TopicAndPartition) => new Pool[Int, Long]
+  private val topicPartitionOffsetMap: Pool[TopicAndPartition, Pool[Int, Long]] =
+      new Pool[TopicAndPartition, Pool[Int,Long]](Some(valueFactory))
+  // Track the messages unacked for consumer rebalance
+  private var numMessageUnacked: AtomicInteger = new AtomicInteger(0)
+  private var consumerRebalanceListener: MirrorMakerConsumerRebalanceListener = null
+  // This is to indicate whether the rebalance is going on so the producer callback knows if
+  // the rebalance latch needs to be pulled.
+  private var inRebalance: AtomicBoolean = new AtomicBoolean(false)
+
+  private val shutdownMessage : MirrorMakerRecord = new MirrorMakerRecord("shutdown", 0, 0, null, "shutdown".getBytes)
+
+  newGauge("MirrorMaker-Unacked-Messages",
+    new Gauge[Int] {
+      def value = numMessageUnacked.get()
+    })
 
   def main(args: Array[String]) {
     
@@ -41,18 +83,21 @@ object MirrorMaker extends Logging {
     val parser = new OptionParser
 
     val consumerConfigOpt = parser.accepts("consumer.config",
-      "Consumer config to consume from a source cluster. " +
-      "You may specify multiple of these.")
+      "Embedded consumer config for consuming from the source cluster.")
       .withRequiredArg()
       .describedAs("config file")
       .ofType(classOf[String])
 
+    // Please see note about MaxInflightRequests
     val producerConfigOpt = parser.accepts("producer.config",
       "Embedded producer config.")
       .withRequiredArg()
       .describedAs("config file")
       .ofType(classOf[String])
 
+    val useNewProducerOpt = parser.accepts("new.producer",
+      "Use the new producer implementation.")
+
     val numProducersOpt = parser.accepts("num.producers",
       "Number of producer instances")
       .withRequiredArg()
@@ -67,11 +112,19 @@ object MirrorMaker extends Logging {
       .ofType(classOf[java.lang.Integer])
       .defaultsTo(1)
 
-    val bufferSizeOpt =  parser.accepts("queue.size", "Number of messages that are buffered between the consumer and producer")
+    val bufferSizeOpt =  parser.accepts("queue.size",
+      "Number of messages that are buffered between the consumer and producer")
       .withRequiredArg()
       .describedAs("Queue size in terms of number of messages")
       .ofType(classOf[java.lang.Integer])
-      .defaultsTo(10000);
+      .defaultsTo(10000)
+
+    val bufferByteSizeOpt =  parser.accepts("queue.byte.size",
+      "Maximum bytes that can be buffered in each data channel queue")
+      .withRequiredArg()
+      .describedAs("Data channel queue size in terms of number of bytes")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(100000000)
 
     val whitelistOpt = parser.accepts("whitelist",
       "Whitelist of topics to mirror.")
@@ -80,12 +133,22 @@ object MirrorMaker extends Logging {
       .ofType(classOf[String])
 
     val blacklistOpt = parser.accepts("blacklist",
-            "Blacklist of topics to mirror.")
-            .withRequiredArg()
-            .describedAs("Java regex (String)")
-            .ofType(classOf[String])
+      "Blacklist of topics to mirror.")
+      .withRequiredArg()
+      .describedAs("Java regex (String)")
+      .ofType(classOf[String])
+
+    val offsetCommitIntervalMsOpt = parser.accepts("offset.commit.interval.ms",
+       "Offset commit interval in ms")
+      .withRequiredArg()
+      .describedAs("offset commit interval in millisecond")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(60000)
 
     val helpOpt = parser.accepts("help", "Print this message.")
+    
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Continuously copy data between two Kafka clusters.")
 
     val options = parser.parse(args : _*)
 
@@ -100,26 +163,60 @@ object MirrorMaker extends Logging {
       System.exit(1)
     }
 
-    val numStreams = options.valueOf(numStreamsOpt)
+    val numProducers = options.valueOf(numProducersOpt).intValue()
+    val numStreams = options.valueOf(numStreamsOpt).intValue()
     val bufferSize = options.valueOf(bufferSizeOpt).intValue()
+    val bufferByteSize = options.valueOf(bufferByteSizeOpt).intValue()
+    val offsetCommitIntervalMs = options.valueOf(offsetCommitIntervalMsOpt).intValue()
+
+    // create consumer connector
+    val consumerConfigProps = Utils.loadProps(options.valuesOf(consumerConfigOpt).head)
+    val consumerConfig = new ConsumerConfig(consumerConfigProps)
+    connector = new ZookeeperConsumerConnector(consumerConfig)
+
+    // create a data channel btw the consumers and the producers
+    val mirrorDataChannel = new DataChannel(bufferSize, bufferByteSize, numInputs = numStreams, numOutputs = numProducers)
+
+    // set consumer rebalance listener
+    // Customized consumer rebalance listener should extend MirrorMakerConsumerRebalanceListener
+    // and take datachannel as argument.
+    val customRebalanceListenerClass = consumerConfigProps.getProperty("consumer.rebalance.listener")
+    consumerRebalanceListener = {
+      if (customRebalanceListenerClass == null) {
+        new MirrorMakerConsumerRebalanceListener(mirrorDataChannel)
+      } else
+        Utils.createObject[MirrorMakerConsumerRebalanceListener](customRebalanceListenerClass, mirrorDataChannel)
+    }
+    connector.setConsumerRebalanceListener(consumerRebalanceListener)
 
-    val producers = (1 to options.valueOf(numProducersOpt).intValue()).map(_ => {
-      val props = Utils.loadProps(options.valueOf(producerConfigOpt))
-      val config = props.getProperty("partitioner.class") match {
-        case null =>
-          new ProducerConfig(props) {
-            override val partitionerClass = "kafka.producer.ByteArrayPartitioner"
-          }
-        case pClass : String =>
-          new ProducerConfig(props)
-      }
-      new Producer[Array[Byte], Array[Byte]](config)
+    // create producer threads
+    val useNewProducer = options.has(useNewProducerOpt)
+    val producerProps = Utils.loadProps(options.valueOf(producerConfigOpt))
+    val clientId = producerProps.getProperty("client.id", "")
+    producerThreads = (0 until numProducers).map(i => {
+      producerProps.setProperty("client.id", clientId + "-" + i)
+      val producer =
+      if (useNewProducer)
+        new MirrorMakerNewProducer(producerProps)
+      else
+        new MirrorMakerOldProducer(producerProps)
+      new ProducerThread(mirrorDataChannel, producer, i)
     })
 
-    connectors = options.valuesOf(consumerConfigOpt).toList
-            .map(cfg => new ConsumerConfig(Utils.loadProps(cfg.toString)))
-            .map(new ZookeeperConsumerConnector(_))
+    // create offset commit thread
+    if (useNewProducer) {
+      /**
+       * The offset commit thread periodically commit consumed offsets to the source cluster. With the new producer,
+       * the offsets are updated upon the returned future metadata of the send() call; with the old producer,
+       * the offsets are updated upon the consumer's iterator advances. By doing this, it is guaranteed no data
+       * loss even when mirror maker is uncleanly shutdown with the new producer, while with the old producer
+       * messages inside the data channel could be lost upon mirror maker unclean shutdown.
+       */
+      offsetCommitThread = new OffsetCommitThread(offsetCommitIntervalMs)
+      offsetCommitThread.start()
+    }
 
+    // create consumer threads
     val filterSpec = if (options.has(whitelistOpt))
       new Whitelist(options.valueOf(whitelistOpt))
     else
@@ -127,18 +224,14 @@ object MirrorMaker extends Logging {
 
     var streams: Seq[KafkaStream[Array[Byte], Array[Byte]]] = Nil
     try {
-      streams = connectors.map(_.createMessageStreamsByFilter(filterSpec, numStreams.intValue(), new DefaultDecoder(), new DefaultDecoder())).flatten
+      streams = connector.createMessageStreamsByFilter(filterSpec, numStreams, new DefaultDecoder(), new DefaultDecoder())
     } catch {
       case t: Throwable =>
         fatal("Unable to create stream - shutting down mirror maker.")
-        connectors.foreach(_.shutdown)
+        connector.shutdown()
     }
-
-    val producerDataChannel = new ProducerDataChannel[KeyedMessage[Array[Byte], Array[Byte]]](bufferSize);
-
-    consumerThreads = streams.zipWithIndex.map(streamAndIndex => new MirrorMakerThread(streamAndIndex._1, producerDataChannel, producers, streamAndIndex._2))
-
-    producerThreads = new ListBuffer[ProducerThread]()
+    consumerThreads = streams.zipWithIndex.map(streamAndIndex => new ConsumerThread(streamAndIndex._1, mirrorDataChannel, streamAndIndex._2))
+    assert(consumerThreads.size == numStreams)
 
     Runtime.getRuntime.addShutdownHook(new Thread() {
       override def run() {
@@ -146,79 +239,383 @@ object MirrorMaker extends Logging {
       }
     })
 
-    // create producer threads
-    var i: Int = 1
-    for(producer <- producers) {
-      val producerThread: KafkaMigrationTool.ProducerThread = new KafkaMigrationTool.ProducerThread(producerDataChannel,
-        new javaapi.producer.Producer[Array[Byte], Array[Byte]](producer), i)
-      producerThreads += producerThread
-      i += 1
-    }
-
     consumerThreads.foreach(_.start)
     producerThreads.foreach(_.start)
 
-    // in case the consumer threads hit a timeout/other exception
-    consumerThreads.foreach(_.awaitShutdown)
-    cleanShutdown()
+    // we wait on producer's shutdown latch instead of consumers
+    // since the consumer threads can hit a timeout/other exception;
+    // but in this case the producer should still be able to shutdown
+    // based on the shutdown message in the channel
+    producerThreads.foreach(_.awaitShutdown())
   }
 
   def cleanShutdown() {
-    if (connectors != null) connectors.foreach(_.shutdown)
-    if (consumerThreads != null) consumerThreads.foreach(_.awaitShutdown)
-    if (producerThreads != null) {
-      producerThreads.foreach(_.shutdown)
-      producerThreads.foreach(_.awaitShutdown)
+    if (isShuttingdown.compareAndSet(false, true)) {
+      info("Start clean shutdown.")
+      // Consumer threads will exit when isCleanShutdown is set.
+      info("Shutting down consumer threads.")
+      if (consumerThreads != null) {
+        consumerThreads.foreach(_.shutdown())
+        consumerThreads.foreach(_.awaitShutdown())
+      }
+      // After consumer threads exit, shutdown producer.
+      info("Shutting down producer threads.")
+      if (producerThreads != null) {
+        producerThreads.foreach(_.shutdown())
+        producerThreads.foreach(_.awaitShutdown())
+      }
+      // offset commit thread should only be shutdown after producer threads are shutdown, so we don't lose offsets.
+      info("Shutting down offset commit thread.")
+      if (offsetCommitThread != null) {
+        offsetCommitThread.shutdown()
+        offsetCommitThread.awaitShutdown()
+      }
+      // connector can only be shutdown after offsets are committed.
+      info("Shutting down consumer connectors.")
+      if (connector != null)
+        connector.shutdown()
+      info("Kafka mirror maker shutdown successfully")
     }
-    info("Kafka mirror maker shutdown successfully")
   }
 
-  class MirrorMakerThread(stream: KafkaStream[Array[Byte], Array[Byte]],
-                          producerDataChannel: ProducerDataChannel[KeyedMessage[Array[Byte], Array[Byte]]],
-                          producers: Seq[Producer[Array[Byte], Array[Byte]]],
-                          threadId: Int)
-          extends Thread with Logging {
+  class DataChannel(messageCapacity: Int, byteCapacity: Int, numInputs: Int, numOutputs: Int)
+      extends KafkaMetricsGroup {
+
+    val queues = new Array[ByteBoundedBlockingQueue[MirrorMakerRecord]](numOutputs)
+    val channelSizeHists = new Array[Histogram](numOutputs)
+    val channelByteSizeHists = new Array[Histogram](numOutputs)
+    val sizeFunction = (record: MirrorMakerRecord) => record.size
+    for (i <- 0 until numOutputs) {
+      queues(i) = new ByteBoundedBlockingQueue[MirrorMakerRecord](messageCapacity, byteCapacity, Some(sizeFunction))
+      channelSizeHists(i) = newHistogram("MirrorMaker-DataChannel-queue-%d-NumMessages".format(i))
+      channelByteSizeHists(i) = newHistogram("MirrorMaker-DataChannel-queue-%d-Bytes".format(i))
+    }
+    private val channelRecordSizeHist = newHistogram("MirrorMaker-DataChannel-Record-Size")
+
+    // We use a single meter for aggregated wait percentage for the data channel.
+    // Since meter is calculated as total_recorded_value / time_window and
+    // time_window is independent of the number of threads, each recorded wait
+    // time should be discounted by # threads.
+    private val waitPut = newMeter("MirrorMaker-DataChannel-WaitOnPut", "percent", TimeUnit.NANOSECONDS)
+    private val waitTake = newMeter("MirrorMaker-DataChannel-WaitOnTake", "percent", TimeUnit.NANOSECONDS)
+
+    def put(record: MirrorMakerRecord) {
+      // Use hash of source topic-partition to decide which queue to put the message in. The benefit is that
+      // we can maintain the message order for both keyed and non-keyed messages.
+      val queueId =
+        Utils.abs(java.util.Arrays.hashCode((record.sourceTopic + record.sourcePartition).toCharArray)) % numOutputs
+      put(record, queueId)
+    }
+
+    def put(record: MirrorMakerRecord, queueId: Int) {
+      val queue = queues(queueId)
+
+      var putSucceed = false
+      while (!putSucceed) {
+        val startPutTime = SystemTime.nanoseconds
+        putSucceed = queue.offer(record, 500, TimeUnit.MILLISECONDS)
+        waitPut.mark((SystemTime.nanoseconds - startPutTime) / numInputs)
+      }
+      channelSizeHists(queueId).update(queue.size())
+      channelByteSizeHists(queueId).update(queue.byteSize())
+      channelRecordSizeHist.update(sizeFunction(record))
+    }
+
+    def take(queueId: Int): MirrorMakerRecord = {
+      val queue = queues(queueId)
+      var data: MirrorMakerRecord = null
+      while (data == null) {
+        val startTakeTime = SystemTime.nanoseconds
+        data = queue.poll(500, TimeUnit.MILLISECONDS)
+        waitTake.mark((SystemTime.nanoseconds - startTakeTime) / numOutputs)
+      }
+      channelSizeHists(queueId).update(queue.size())
+      channelByteSizeHists(queueId).update(queue.byteSize())
+      data
+    }
+
+    def clear() {
+      queues.foreach(queue => queue.clear())
+    }
+  }
+
+  class ConsumerThread(stream: KafkaStream[Array[Byte], Array[Byte]],
+                       mirrorDataChannel: DataChannel,
+                       threadId: Int)
+          extends Thread with Logging with KafkaMetricsGroup {
 
     private val shutdownLatch = new CountDownLatch(1)
-    private val threadName = "mirrormaker-" + threadId
+    private val threadName = "mirrormaker-consumer-" + threadId
     this.logIdent = "[%s] ".format(threadName)
+    private var shutdownFlag: Boolean = false
 
     this.setName(threadName)
 
     override def run() {
-      info("Starting mirror maker thread " + threadName)
+      info("Starting mirror maker consumer thread " + threadName)
       try {
-        for (msgAndMetadata <- stream) {
-          // If the key of the message is empty, put it into the universal channel
-          // Otherwise use a pre-assigned producer to send the message
-          if (msgAndMetadata.key == null) {
-            trace("Send the non-keyed message the producer channel.")
-            val pd = new KeyedMessage[Array[Byte], Array[Byte]](msgAndMetadata.topic, msgAndMetadata.message)
-            producerDataChannel.sendRequest(pd)
-          } else {
-            val producerId = Utils.abs(java.util.Arrays.hashCode(msgAndMetadata.key)) % producers.size()
-            trace("Send message with key %s to producer %d.".format(java.util.Arrays.toString(msgAndMetadata.key), producerId))
-            val producer = producers(producerId)
-            val pd = new KeyedMessage[Array[Byte], Array[Byte]](msgAndMetadata.topic, msgAndMetadata.key, msgAndMetadata.message)
-            producer.send(pd)
-          }
+        val iter = stream.iterator()
+        while (!shutdownFlag && iter.hasNext()) {
+          val msgAndMetadata = iter.next()
+          val data = new MirrorMakerRecord(msgAndMetadata.topic,
+                                           msgAndMetadata.partition,
+                                           msgAndMetadata.offset,
+                                           msgAndMetadata.key(),
+                                           msgAndMetadata.message())
+          mirrorDataChannel.put(data)
         }
       } catch {
-        case e: Throwable =>
+        case e: Throwable => {
           fatal("Stream unexpectedly exited.", e)
+        }
       } finally {
         shutdownLatch.countDown()
-        info("Stopped thread.")
+        info("Consumer thread stopped")
+
+        // If it exits accidentally, stop the entire mirror maker.
+        if (!isShuttingdown.get()) {
+          fatal("Consumer thread exited abnormally, stopping the whole mirror maker.")
+          System.exit(-1)
+        }
       }
     }
 
+    def shutdown() {
+      shutdownFlag = true
+    }
+
     def awaitShutdown() {
       try {
         shutdownLatch.await()
+        info("Consumer thread shutdown complete")
       } catch {
-        case e: InterruptedException => fatal("Shutdown of thread %s interrupted. This might leak data!".format(threadName))
+        case e: InterruptedException => fatal("Shutdown of the consumer thread interrupted. This might leak data!")
       }
     }
   }
+
+  class ProducerThread (val dataChannel: DataChannel,
+                        val producer: MirrorMakerBaseProducer,
+                        val threadId: Int) extends Thread with Logging with KafkaMetricsGroup {
+    private val threadName = "mirrormaker-producer-" + threadId
+    private val shutdownComplete: CountDownLatch = new CountDownLatch(1)
+    this.logIdent = "[%s] ".format(threadName)
+
+    setName(threadName)
+
+    override def run() {
+      info("Starting mirror maker producer thread " + threadName)
+      try {
+        while (true) {
+          val data: MirrorMakerRecord = dataChannel.take(threadId)
+          trace("Sending message with value size %d".format(data.value.size))
+          if(data eq shutdownMessage) {
+            info("Received shutdown message")
+            return
+          }
+          producer.send(new TopicAndPartition(data.sourceTopic, data.sourcePartition),
+                        data.sourceOffset,
+                        data.key,
+                        data.value)
+        }
+      } catch {
+        case t: Throwable =>
+          fatal("Producer thread failure due to ", t)
+      } finally {
+        shutdownComplete.countDown()
+        info("Producer thread stopped")
+        // if it exits accidentally, stop the entire mirror maker
+        if (!isShuttingdown.get()) {
+          fatal("Producer thread exited abnormally, stopping the whole mirror maker.")
+          System.exit(-1)
+        }
+      }
+    }
+
+    def shutdown() {
+      try {
+        info("Producer thread " + threadName + " shutting down")
+        dataChannel.put(shutdownMessage, threadId)
+      }
+      catch {
+        case ie: InterruptedException => {
+          warn("Interrupt during shutdown of ProducerThread")
+        }
+      }
+    }
+
+    def awaitShutdown() {
+      try {
+        shutdownComplete.await()
+        producer.close()
+        info("Producer thread shutdown complete")
+      } catch {
+        case ie: InterruptedException => {
+          warn("Shutdown of the producer thread interrupted")
+        }
+      }
+    }
+  }
+
+  class OffsetCommitThread(commitIntervalMs: Int) extends Thread with Logging with KafkaMetricsGroup {
+    private val threadName = "mirrormaker-offset-commit-thread"
+    private val shutdownComplete: CountDownLatch = new CountDownLatch(1)
+    this.logIdent = "[%s]".format(threadName)
+    var shutdownFlag: Boolean = false
+    var commitCounter: Int = 0
+
+    this.setName(threadName)
+
+    newGauge("MirrorMaker-Offset-Commit-Counter",
+      new Gauge[Int] {
+        def value = commitCounter
+      })
+
+    /**
+     * Use the connector to commit all the offsets.
+     */
+    override def run() {
+      info("Starting mirror maker offset commit thread")
+      try {
+        while (!shutdownFlag) {
+          Thread.sleep(commitIntervalMs)
+          commitOffset()
+        }
+      } catch {
+        case t: Throwable => fatal("Exits due to", t)
+      } finally {
+        swallow(commitOffset())
+        shutdownComplete.countDown()
+        info("Offset commit thread exited")
+        if (!isShuttingdown.get()) {
+          fatal("Offset commit thread exited abnormally, stopping the whole mirror maker.")
+          System.exit(-1)
+        }
+      }
+    }
+
+    def commitOffset() {
+      val offsetsToCommit = collection.immutable.Map(topicPartitionOffsetMap.map {
+        case (topicPartition, partitionOffsetMap) =>
+        topicPartition -> OffsetAndMetadata(getOffsetToCommit(partitionOffsetMap), null)
+      }.toSeq: _*)
+      trace("committing offset: %s".format(offsetsToCommit))
+      if (connector == null) {
+        warn("No consumer connector available to commit offset.")
+      } else {
+        connector.commitOffsets(
+          isAutoCommit = false,
+          topicPartitionOffsets = offsetsToCommit
+        )
+        commitCounter += 1
+      }
+    }
+
+    private def getOffsetToCommit(offsetsMap: Pool[Int, Long]): Long = {
+      val offsets = offsetsMap.map(_._2).toSeq.sorted
+      val iter = offsets.iterator
+      var offsetToCommit = iter.next()
+      while (iter.hasNext && offsetToCommit + 1 == iter.next())
+        offsetToCommit += 1
+      // The committed offset will be the first offset of un-consumed message, hence we need to increment by one.
+      offsetToCommit + 1
+    }
+
+    def shutdown() {
+      shutdownFlag = true
+    }
+
+    def awaitShutdown() {
+      try {
+        shutdownComplete.await()
+        info("Offset commit thread shutdown complete")
+      } catch {
+        case ie: InterruptedException => {
+          warn("Shutdown of the offset commit thread interrupted")
+        }
+      }
+    }
+  }
+
+  private[kafka] trait MirrorMakerBaseProducer {
+    def send(topicPartition: TopicAndPartition, offset: Long, key: Array[Byte], value: Array[Byte])
+    def close()
+  }
+
+  private class MirrorMakerNewProducer (val producerProps: Properties)
+      extends NewShinyProducer(producerProps) with MirrorMakerBaseProducer {
+
+    override def send(topicPartition: TopicAndPartition, offset: Long, key: Array[Byte], value: Array[Byte]) {
+      val record = new ProducerRecord[Array[Byte],Array[Byte]](topicPartition.topic, key, value)
+      if(sync) {
+        topicPartitionOffsetMap.getAndMaybePut(topicPartition).put(this.producer.send(record).get().partition(), offset)
+      } else {
+        this.producer.send(record,
+          new MirrorMakerProducerCallback(topicPartition, offset, key, value))
+        numMessageUnacked.incrementAndGet()
+      }
+    }
+  }
+
+  private class MirrorMakerOldProducer (val producerProps: Properties)
+      extends OldProducer(producerProps) with MirrorMakerBaseProducer {
+
+    override def send(topicPartition: TopicAndPartition, offset: Long, key: Array[Byte], value: Array[Byte]) {
+      super.send(topicPartition.topic, key, value)
+    }
+
+    override def close() {
+      super.close()
+    }
+  }
+
+  private class MirrorMakerProducerCallback (val topicPartition: TopicAndPartition,
+                                             val offset: Long,
+                                             val key: Array[Byte],
+                                             val value: Array[Byte])
+    extends ErrorLoggingCallback(topicPartition.topic, key, value, false) {
+
+    override def onCompletion(metadata: RecordMetadata, exception: Exception) {
+      if (exception != null) {
+        // Use default call back to log error
+        super.onCompletion(metadata, exception)
+      } else {
+        trace("updating offset:[%s] -> %d".format(topicPartition, offset))
+        topicPartitionOffsetMap.getAndMaybePut(topicPartition).put(metadata.partition(), offset)
+      }
+      // Notify the rebalance callback only when all the messages handed to producer are acked.
+      // There is a very slight chance that 1 message is held by producer thread and not handed to producer.
+      // That message might have duplicate. We are not handling that here.
+      if (numMessageUnacked.decrementAndGet() == 0 && inRebalance.get()) {
+        inRebalance synchronized {inRebalance.notify()}
+      }
+    }
+  }
+
+  class MirrorMakerConsumerRebalanceListener (dataChannel: DataChannel) extends ConsumerRebalanceListener {
+
+    override def beforeReleasingPartitions(partitionOwnership: java.util.Map[String, java.util.Set[java.lang.Integer]]) {
+      info("Clearing data channel.")
+      dataChannel.clear()
+      info("Waiting until all the messages are acked.")
+      inRebalance synchronized {
+        inRebalance.set(true)
+        while (numMessageUnacked.get() > 0)
+          inRebalance.wait()
+      }
+      info("Committing offsets.")
+      offsetCommitThread.commitOffset()
+      inRebalance.set(true)
+    }
+  }
+
+  private[kafka] class MirrorMakerRecord (val sourceTopic: String,
+                                          val sourcePartition: Int,
+                                          val sourceOffset: Long,
+                                          val key: Array[Byte],
+                                          val value: Array[Byte]) {
+    def size = value.length + {if (key == null) 0 else key.length}
+  }
+
 }
 
diff --git a/perf/src/main/scala/kafka/perf/PerfConfig.scala b/core/src/main/scala/kafka/tools/PerfConfig.scala
similarity index 96%
rename from perf/src/main/scala/kafka/perf/PerfConfig.scala
rename to core/src/main/scala/kafka/tools/PerfConfig.scala
index a8fc6b9ec81c4..d073acf9adf4b 100644
--- a/perf/src/main/scala/kafka/perf/PerfConfig.scala
+++ b/core/src/main/scala/kafka/tools/PerfConfig.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
 */
 
-package kafka.perf
+package kafka.tools
 
 import joptsimple.OptionParser
 
@@ -53,7 +53,7 @@ class PerfConfig(args: Array[String]) {
     .defaultsTo(200)
   val compressionCodecOpt = parser.accepts("compression-codec", "If set, messages are sent compressed")
     .withRequiredArg
-    .describedAs("supported codec: NoCompressionCodec as 0, GZIPCompressionCodec as 1, SnappyCompressionCodec as 2")
+    .describedAs("supported codec: NoCompressionCodec as 0, GZIPCompressionCodec as 1, SnappyCompressionCodec as 2, LZ4CompressionCodec as 3")
     .ofType(classOf[java.lang.Integer])
     .defaultsTo(0)
   val helpOpt = parser.accepts("help", "Print usage.")
diff --git a/perf/src/main/scala/kafka/perf/ProducerPerformance.scala b/core/src/main/scala/kafka/tools/ProducerPerformance.scala
similarity index 55%
rename from perf/src/main/scala/kafka/perf/ProducerPerformance.scala
rename to core/src/main/scala/kafka/tools/ProducerPerformance.scala
index ad2ac26411de8..f2dc4ed2f04f0 100644
--- a/perf/src/main/scala/kafka/perf/ProducerPerformance.scala
+++ b/core/src/main/scala/kafka/tools/ProducerPerformance.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -15,20 +15,21 @@
  * limitations under the License.
  */
 
-package kafka.perf
+package kafka.tools
+
+import kafka.metrics.KafkaMetricsReporter
+import kafka.producer.{OldProducer, NewShinyProducer}
+import kafka.utils.{ToolsUtils, VerifiableProperties, Logging, CommandLineUtils}
+import kafka.message.CompressionCodec
+import kafka.serializer._
 
 import java.util.concurrent.{CountDownLatch, Executors}
 import java.util.concurrent.atomic.AtomicLong
-import kafka.producer._
-import org.apache.log4j.Logger
-import kafka.message.{CompressionCodec, Message}
-import java.text.SimpleDateFormat
-import kafka.serializer._
 import java.util._
-import collection.immutable.List
-import kafka.utils.{VerifiableProperties, Logging}
-import kafka.metrics.KafkaMetricsReporter
+import java.text.SimpleDateFormat
+import java.math.BigInteger
 
+import org.apache.log4j.Logger
 
 /**
  * Load test for the producer
@@ -39,7 +40,7 @@ object ProducerPerformance extends Logging {
 
     val logger = Logger.getLogger(getClass)
     val config = new ProducerPerfConfig(args)
-    if(!config.isFixSize)
+    if (!config.isFixedSize)
       logger.info("WARN: Throughput will be slower due to changing message size per request")
 
     val totalBytesSent = new AtomicLong(0)
@@ -49,84 +50,80 @@ object ProducerPerformance extends Logging {
     val startMs = System.currentTimeMillis
     val rand = new java.util.Random
 
-    if(!config.hideHeader)
-        println("start.time, end.time, compression, message.size, batch.size, total.data.sent.in.MB, MB.sec, " +
-                        "total.data.sent.in.nMsg, nMsg.sec")
+    if (!config.hideHeader)
+      println("start.time, end.time, compression, message.size, batch.size, total.data.sent.in.MB, MB.sec, " +
+        "total.data.sent.in.nMsg, nMsg.sec")
 
-    for(i <- 0 until config.numThreads) {
+    for (i <- 0 until config.numThreads) {
       executor.execute(new ProducerThread(i, config, totalBytesSent, totalMessagesSent, allDone, rand))
     }
 
     allDone.await()
     val endMs = System.currentTimeMillis
     val elapsedSecs = (endMs - startMs) / 1000.0
-    val totalMBSent = (totalBytesSent.get * 1.0)/ (1024 * 1024)
+    val totalMBSent = (totalBytesSent.get * 1.0) / (1024 * 1024)
     println(("%s, %s, %d, %d, %d, %.2f, %.4f, %d, %.4f").format(
       config.dateFormat.format(startMs), config.dateFormat.format(endMs),
       config.compressionCodec.codec, config.messageSize, config.batchSize, totalMBSent,
-      totalMBSent/elapsedSecs, totalMessagesSent.get, totalMessagesSent.get/elapsedSecs))
+      totalMBSent / elapsedSecs, totalMessagesSent.get, totalMessagesSent.get / elapsedSecs))
     System.exit(0)
   }
 
   class ProducerPerfConfig(args: Array[String]) extends PerfConfig(args) {
     val brokerListOpt = parser.accepts("broker-list", "REQUIRED: broker info (the list of broker host and port for bootstrap.")
-            .withRequiredArg
-            .describedAs("hostname:port,..,hostname:port")
-            .ofType(classOf[String])
+      .withRequiredArg
+      .describedAs("hostname:port,..,hostname:port")
+      .ofType(classOf[String])
     val topicsOpt = parser.accepts("topics", "REQUIRED: The comma separated list of topics to produce to")
       .withRequiredArg
       .describedAs("topic1,topic2..")
       .ofType(classOf[String])
     val producerRequestTimeoutMsOpt = parser.accepts("request-timeout-ms", "The produce request timeout in ms")
-            .withRequiredArg()
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(3000)
+      .withRequiredArg()
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(3000)
     val producerNumRetriesOpt = parser.accepts("producer-num-retries", "The producer retries number")
-            .withRequiredArg()
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(3)
+      .withRequiredArg()
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(3)
     val producerRetryBackOffMsOpt = parser.accepts("producer-retry-backoff-ms", "The producer retry backoff time in milliseconds")
-            .withRequiredArg()
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(100)
+      .withRequiredArg()
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(100)
     val producerRequestRequiredAcksOpt = parser.accepts("request-num-acks", "Number of acks required for producer request " +
-            "to complete")
-            .withRequiredArg()
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(-1)
+      "to complete")
+      .withRequiredArg()
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(-1)
     val varyMessageSizeOpt = parser.accepts("vary-message-size", "If set, message size will vary up to the given maximum.")
     val syncOpt = parser.accepts("sync", "If set, messages are sent synchronously.")
     val numThreadsOpt = parser.accepts("threads", "Number of sending threads.")
-            .withRequiredArg
-            .describedAs("number of threads")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(1)
+      .withRequiredArg
+      .describedAs("number of threads")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1)
     val initialMessageIdOpt = parser.accepts("initial-message-id", "The is used for generating test data, If set, messages will be tagged with an " +
-            "ID and sent by producer starting from this ID sequentially. Message content will be String type and " +
-            "in the form of 'Message:000...1:xxx...'")
-            .withRequiredArg()
-            .describedAs("initial message id")
-            .ofType(classOf[java.lang.Integer])
+      "ID and sent by producer starting from this ID sequentially. Message content will be String type and " +
+      "in the form of 'Message:000...1:xxx...'")
+      .withRequiredArg()
+      .describedAs("initial message id")
+      .ofType(classOf[java.lang.Integer])
     val messageSendGapMsOpt = parser.accepts("message-send-gap-ms", "If set, the send thread will wait for specified time between two sends")
-            .withRequiredArg()
-            .describedAs("message send time gap")
-            .ofType(classOf[java.lang.Integer])
-            .defaultsTo(0)
+      .withRequiredArg()
+      .describedAs("message send time gap")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(0)
     val csvMetricsReporterEnabledOpt = parser.accepts("csv-reporter-enabled", "If set, the CSV metrics reporter will be enabled")
     val metricsDirectoryOpt = parser.accepts("metrics-dir", "If csv-reporter-enable is set, and this parameter is" +
-            "set, the csv metrics will be outputed here")
+      "set, the csv metrics will be outputed here")
       .withRequiredArg
       .describedAs("metrics dictory")
       .ofType(classOf[java.lang.String])
+    val useNewProducerOpt = parser.accepts("new-producer", "Use the new producer implementation.")
+
+    val options = parser.parse(args: _*)
+    CommandLineUtils.checkRequiredArgs(parser, options, topicsOpt, brokerListOpt, numMessagesOpt)
 
-    val options = parser.parse(args : _*)
-    for(arg <- List(topicsOpt, brokerListOpt, numMessagesOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
     val topicsStr = options.valueOf(topicsOpt)
     val topics = topicsStr.split(",")
     val numMessages = options.valueOf(numMessagesOpt).longValue
@@ -134,20 +131,22 @@ object ProducerPerformance extends Logging {
     val dateFormat = new SimpleDateFormat(options.valueOf(dateFormatOpt))
     val hideHeader = options.has(hideHeaderOpt)
     val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser,brokerList)
     val messageSize = options.valueOf(messageSizeOpt).intValue
-    var isFixSize = !options.has(varyMessageSizeOpt)
+    var isFixedSize = !options.has(varyMessageSizeOpt)
     var isSync = options.has(syncOpt)
     var batchSize = options.valueOf(batchSizeOpt).intValue
     var numThreads = options.valueOf(numThreadsOpt).intValue
     val compressionCodec = CompressionCodec.getCompressionCodec(options.valueOf(compressionCodecOpt).intValue)
     val seqIdMode = options.has(initialMessageIdOpt)
     var initialMessageId: Int = 0
-    if(seqIdMode)
+    if (seqIdMode)
       initialMessageId = options.valueOf(initialMessageIdOpt).intValue()
     val producerRequestTimeoutMs = options.valueOf(producerRequestTimeoutMsOpt).intValue()
     val producerRequestRequiredAcks = options.valueOf(producerRequestRequiredAcksOpt).intValue()
     val producerNumRetries = options.valueOf(producerNumRetriesOpt).intValue()
     val producerRetryBackoffMs = options.valueOf(producerRetryBackOffMsOpt).intValue()
+    val useNewProducer = options.has(useNewProducerOpt)
 
     val csvMetricsReporterEnabled = options.has(csvMetricsReporterEnabledOpt)
 
@@ -168,43 +167,53 @@ object ProducerPerformance extends Logging {
   }
 
   class ProducerThread(val threadId: Int,
-                       val config: ProducerPerfConfig,
-                       val totalBytesSent: AtomicLong,
-                       val totalMessagesSent: AtomicLong,
-                       val allDone: CountDownLatch,
-                       val rand: Random) extends Runnable {
-    val props = new Properties()
-    props.put("metadata.broker.list", config.brokerList)
-    props.put("compression.codec", config.compressionCodec.codec.toString)
-    props.put("reconnect.interval", Integer.MAX_VALUE.toString)
-    props.put("send.buffer.bytes", (64*1024).toString)
-    if(!config.isSync) {
-      props.put("producer.type","async")
-      props.put("batch.num.messages", config.batchSize.toString)
-      props.put("queue.enqueue.timeout.ms", "-1")
-    }
-    props.put("client.id", "ProducerPerformance")
-    props.put("request.required.acks", config.producerRequestRequiredAcks.toString)
-    props.put("request.timeout.ms", config.producerRequestTimeoutMs.toString)
-    props.put("message.send.max.retries", config.producerNumRetries.toString)
-    props.put("retry.backoff.ms", config.producerRetryBackoffMs.toString)
-    props.put("serializer.class", classOf[DefaultEncoder].getName.toString)
-    props.put("key.serializer.class", classOf[NullEncoder[Long]].getName.toString)
-
-    
-    val producerConfig = new ProducerConfig(props)
-    val producer = new Producer[Long, Array[Byte]](producerConfig)
-    val seqIdNumDigit = 10   // no. of digits for max int value
+    val config: ProducerPerfConfig,
+    val totalBytesSent: AtomicLong,
+    val totalMessagesSent: AtomicLong,
+    val allDone: CountDownLatch,
+    val rand: Random) extends Runnable {
+    val seqIdNumDigit = 10 // no. of digits for max int value
 
     val messagesPerThread = config.numMessages / config.numThreads
     debug("Messages per thread = " + messagesPerThread)
+    val props = new Properties()
+    val producer =
+      if (config.useNewProducer) {
+        import org.apache.kafka.clients.producer.ProducerConfig
+        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.brokerList)
+        props.put(ProducerConfig.SEND_BUFFER_CONFIG, (64 * 1024).toString)
+        props.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-performance")
+        props.put(ProducerConfig.ACKS_CONFIG, config.producerRequestRequiredAcks.toString)
+        props.put(ProducerConfig.TIMEOUT_CONFIG, config.producerRequestTimeoutMs.toString)
+        props.put(ProducerConfig.RETRIES_CONFIG, config.producerNumRetries.toString)
+        props.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, config.producerRetryBackoffMs.toString)
+        props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, config.compressionCodec.name)
+        new NewShinyProducer(props)
+      } else {
+        props.put("metadata.broker.list", config.brokerList)
+        props.put("compression.codec", config.compressionCodec.codec.toString)
+        props.put("send.buffer.bytes", (64 * 1024).toString)
+        if (!config.isSync) {
+          props.put("producer.type", "async")
+          props.put("batch.num.messages", config.batchSize.toString)
+          props.put("queue.enqueue.timeout.ms", "-1")
+        }
+        props.put("client.id", "producer-performance")
+        props.put("request.required.acks", config.producerRequestRequiredAcks.toString)
+        props.put("request.timeout.ms", config.producerRequestTimeoutMs.toString)
+        props.put("message.send.max.retries", config.producerNumRetries.toString)
+        props.put("retry.backoff.ms", config.producerRetryBackoffMs.toString)
+        props.put("serializer.class", classOf[DefaultEncoder].getName)
+        props.put("key.serializer.class", classOf[NullEncoder[Long]].getName)
+        new OldProducer(props)
+      }
 
     // generate the sequential message ID
-    private val SEP            = ":"              // message field separator
+    private val SEP = ":" // message field separator
     private val messageIdLabel = "MessageID"
-    private val threadIdLabel  = "ThreadID"
-    private val topicLabel     = "Topic"
-    private var leftPaddedSeqId : String = ""
+    private val threadIdLabel = "ThreadID"
+    private val topicLabel = "Topic"
+    private var leftPaddedSeqId: String = ""
 
     private def generateMessageWithSeqId(topic: String, msgId: Long, msgSize: Int): Array[Byte] = {
       // Each thread gets a unique range of sequential no. for its ids.
@@ -213,54 +222,57 @@ object ProducerPerformance extends Logging {
       // thread 1 IDs : 100 ~ 199
       // thread 2 IDs : 200 ~ 299
       // . . .
-      leftPaddedSeqId = String.format("%0"+seqIdNumDigit+"d", long2Long(msgId))
+      leftPaddedSeqId = String.format("%0" + seqIdNumDigit + "d", long2Long(msgId))
 
-      val msgHeader = topicLabel      + SEP +
-              topic           + SEP +
-              threadIdLabel   + SEP +
-              threadId        + SEP +
-              messageIdLabel  + SEP +
-              leftPaddedSeqId + SEP
+      val msgHeader = topicLabel + SEP +
+        topic + SEP +
+        threadIdLabel + SEP +
+        threadId + SEP +
+        messageIdLabel + SEP +
+        leftPaddedSeqId + SEP
 
-      val seqMsgString = String.format("%1$-"+msgSize+"s", msgHeader).replace(' ', 'x')
+      val seqMsgString = String.format("%1$-" + msgSize + "s", msgHeader).replace(' ', 'x')
       debug(seqMsgString)
       return seqMsgString.getBytes()
     }
 
-    private def generateProducerData(topic: String, messageId: Long): (KeyedMessage[Long, Array[Byte]], Int) = {
-      val msgSize = if(config.isFixSize) config.messageSize else 1 + rand.nextInt(config.messageSize)
-      val message =
-        if(config.seqIdMode) {
-          val seqId = config.initialMessageId + (messagesPerThread * threadId) + messageId
-          generateMessageWithSeqId(topic, seqId, msgSize)
-        } else {
-          new Array[Byte](msgSize)
-        }
-      (new KeyedMessage[Long, Array[Byte]](topic, messageId, message), message.length)
+    private def generateProducerData(topic: String, messageId: Long): Array[Byte] = {
+      val msgSize = if (config.isFixedSize) config.messageSize else 1 + rand.nextInt(config.messageSize)
+      if (config.seqIdMode) {
+        val seqId = config.initialMessageId + (messagesPerThread * threadId) + messageId
+        generateMessageWithSeqId(topic, seqId, msgSize)
+      } else {
+        new Array[Byte](msgSize)
+      }
     }
 
     override def run {
       var bytesSent = 0L
       var nSends = 0
-      var j: Long = 0L
-      while(j < messagesPerThread) {
+      var i: Long = 0L
+      var message: Array[Byte] = null
+
+      while (i < messagesPerThread) {
         try {
           config.topics.foreach(
             topic => {
-              val (producerData, bytesSent_) = generateProducerData(topic, j)
-              bytesSent += bytesSent_
-              producer.send(producerData)
+              message = generateProducerData(topic, i)
+              producer.send(topic, BigInteger.valueOf(i).toByteArray, message)
+              bytesSent += message.size
               nSends += 1
-              if(config.messageSendGapMs > 0)
+              if (config.messageSendGapMs > 0)
                 Thread.sleep(config.messageSendGapMs)
-            }
-          )
+            })
         } catch {
-          case e: Exception => error("Error sending messages", e)
+          case e: Throwable => error("Error when sending message " + new String(message), e)
         }
-        j += 1
+        i += 1
+      }
+      try {
+        producer.close()
+      } catch {
+        case e: Throwable => error("Error when closing producer", e)
       }
-      producer.close()
       totalBytesSent.addAndGet(bytesSent)
       totalMessagesSent.addAndGet(nSends)
       allDone.countDown()
diff --git a/core/src/main/scala/kafka/tools/ReplayLogProducer.scala b/core/src/main/scala/kafka/tools/ReplayLogProducer.scala
index 814d61ae477ce..f541987b2876a 100644
--- a/core/src/main/scala/kafka/tools/ReplayLogProducer.scala
+++ b/core/src/main/scala/kafka/tools/ReplayLogProducer.scala
@@ -20,11 +20,10 @@ package kafka.tools
 import joptsimple.OptionParser
 import java.util.concurrent.{Executors, CountDownLatch}
 import java.util.Properties
-import kafka.producer.{KeyedMessage, ProducerConfig, Producer}
 import kafka.consumer._
-import kafka.utils.{Logging, ZkUtils}
+import kafka.utils.{ToolsUtils, CommandLineUtils, Logging, ZkUtils}
 import kafka.api.OffsetRequest
-import kafka.message.CompressionCodec
+import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer, ProducerConfig}
 
 object ReplayLogProducer extends Logging {
 
@@ -88,17 +87,6 @@ object ReplayLogProducer extends Logging {
       .describedAs("count")
       .ofType(classOf[java.lang.Integer])
       .defaultsTo(-1)
-    val asyncOpt = parser.accepts("async", "If set, messages are sent asynchronously.")
-    val delayMSBtwBatchOpt = parser.accepts("delay-btw-batch-ms", "Delay in ms between 2 batch sends.")
-      .withRequiredArg
-      .describedAs("ms")
-      .ofType(classOf[java.lang.Long])
-      .defaultsTo(0)
-    val batchSizeOpt = parser.accepts("batch-size", "Number of messages to send in a single batch.")
-      .withRequiredArg
-      .describedAs("batch size")
-      .ofType(classOf[java.lang.Integer])
-      .defaultsTo(200)
     val numThreadsOpt = parser.accepts("threads", "Number of sending threads.")
       .withRequiredArg
       .describedAs("threads")
@@ -109,48 +97,34 @@ object ReplayLogProducer extends Logging {
       .describedAs("size")
       .ofType(classOf[java.lang.Integer])
       .defaultsTo(5000)
-    val compressionCodecOption = parser.accepts("compression-codec", "If set, messages are sent compressed")
+    val propertyOpt = parser.accepts("property", "A mechanism to pass properties in the form key=value to the producer. " +
+      "This allows the user to override producer properties that are not exposed by the existing command line arguments")
       .withRequiredArg
-      .describedAs("compression codec ")
-      .ofType(classOf[java.lang.Integer])
-      .defaultsTo(0)
+      .describedAs("producer properties")
+      .ofType(classOf[String])
+    val syncOpt = parser.accepts("sync", "If set message send requests to the brokers are synchronously, one at a time as they arrive.")
 
     val options = parser.parse(args : _*)
-    for(arg <- List(brokerListOpt, inputTopicOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    
+    CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, inputTopicOpt)
+
     val zkConnect = options.valueOf(zkConnectOpt)
     val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser,brokerList)
     val numMessages = options.valueOf(numMessagesOpt).intValue
-    val isAsync = options.has(asyncOpt)
-    val delayedMSBtwSend = options.valueOf(delayMSBtwBatchOpt).longValue
-    var batchSize = options.valueOf(batchSizeOpt).intValue
     val numThreads = options.valueOf(numThreadsOpt).intValue
     val inputTopic = options.valueOf(inputTopicOpt)
     val outputTopic = options.valueOf(outputTopicOpt)
     val reportingInterval = options.valueOf(reportingIntervalOpt).intValue
-    val compressionCodec = CompressionCodec.getCompressionCodec(options.valueOf(compressionCodecOption).intValue)
+    val isSync = options.has(syncOpt)
+    import scala.collection.JavaConversions._
+    val producerProps = CommandLineUtils.parseKeyValueArgs(options.valuesOf(propertyOpt))
+    producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
   }
 
   class ZKConsumerThread(config: Config, stream: KafkaStream[Array[Byte], Array[Byte]]) extends Thread with Logging {
     val shutdownLatch = new CountDownLatch(1)
-    val props = new Properties()
-    props.put("metadata.broker.list", config.brokerList)
-    props.put("reconnect.interval", Integer.MAX_VALUE.toString)
-    props.put("send.buffer.bytes", (64*1024).toString)
-    props.put("compression.codec", config.compressionCodec.codec.toString)
-    props.put("batch.num.messages", config.batchSize.toString)
-    props.put("queue.enqueue.timeout.ms", "-1")
-    
-    if(config.isAsync)
-      props.put("producer.type", "async")
-
-    val producerConfig = new ProducerConfig(props)
-    val producer = new Producer[Array[Byte], Array[Byte]](producerConfig)
+    val producer = new KafkaProducer[Array[Byte],Array[Byte]](config.producerProps)
 
     override def run() {
       info("Starting consumer thread..")
@@ -163,9 +137,11 @@ object ReplayLogProducer extends Logging {
             stream
         for (messageAndMetadata <- iter) {
           try {
-            producer.send(new KeyedMessage[Array[Byte], Array[Byte]](config.outputTopic, messageAndMetadata.message))
-            if (config.delayedMSBtwSend > 0 && (messageCount + 1) % config.batchSize == 0)
-              Thread.sleep(config.delayedMSBtwSend)
+            val response = producer.send(new ProducerRecord[Array[Byte],Array[Byte]](config.outputTopic,
+                                            messageAndMetadata.key(), messageAndMetadata.message()))
+            if(config.isSync) {
+              response.get()
+            }
             messageCount += 1
           }catch {
             case ie: Exception => error("Skipping this message", ie)
diff --git a/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala b/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala
index 5e8c56d963174..ba6ddd7a909df 100644
--- a/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala
+++ b/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala
@@ -92,7 +92,9 @@ object ReplicaVerificationTool extends Logging {
                          .describedAs("ms")
                          .ofType(classOf[java.lang.Long])
                          .defaultsTo(30 * 1000L)
-
+                         
+   if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Validate that all replicas for a set of topics have the same data.")
 
     val options = parser.parse(args : _*)
     CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt)
@@ -114,11 +116,16 @@ object ReplicaVerificationTool extends Logging {
     val reportInterval = options.valueOf(reportIntervalOpt).longValue
     // getting topic metadata
     info("Getting topic metatdata...")
-    val metadataTargetBrokers = ClientUtils.parseBrokerList(options.valueOf(brokerListOpt))
+    val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser,brokerList)
+    val metadataTargetBrokers = ClientUtils.parseBrokerList(brokerList)
     val topicsMetadataResponse = ClientUtils.fetchTopicMetadata(Set[String](), metadataTargetBrokers, clientId, maxWaitMs)
-    val brokerMap = topicsMetadataResponse.extractBrokers(topicsMetadataResponse.topicsMetadata)
+    val brokerMap = topicsMetadataResponse.brokers.map(b => (b.id, b)).toMap
     val filteredTopicMetadata = topicsMetadataResponse.topicsMetadata.filter(
-        topicMetadata => if (topicWhiteListFiler.isTopicAllowed(topicMetadata.topic)) true else false
+        topicMetadata => if (topicWhiteListFiler.isTopicAllowed(topicMetadata.topic, excludeInternalTopics = false))
+          true
+        else
+          false
     )
     val topicPartitionReplicaList: Seq[TopicPartitionReplica] = filteredTopicMetadata.flatMap(
       topicMetadataResponse =>
@@ -299,7 +306,7 @@ private class ReplicaBuffer(expectedReplicasPerTopicAndPartition: Map[TopicAndPa
             } else
               isMessageInAllReplicas = false
           } catch {
-            case t =>
+            case t: Throwable =>
               throw new RuntimeException("Error in processing replica %d in partition %s at offset %d."
               .format(replicaId, topicAndPartition, fetchOffsetMap.get(topicAndPartition)), t)
           }
diff --git a/perf/src/main/scala/kafka/perf/SimpleConsumerPerformance.scala b/core/src/main/scala/kafka/tools/SimpleConsumerPerformance.scala
similarity index 96%
rename from perf/src/main/scala/kafka/perf/SimpleConsumerPerformance.scala
rename to core/src/main/scala/kafka/tools/SimpleConsumerPerformance.scala
index c52ada0a30ae4..7602b8d705970 100644
--- a/perf/src/main/scala/kafka/perf/SimpleConsumerPerformance.scala
+++ b/core/src/main/scala/kafka/tools/SimpleConsumerPerformance.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package kafka.perf
+package kafka.tools
 
 import java.net.URI
 import java.text.SimpleDateFormat
@@ -74,17 +74,17 @@ object SimpleConsumerPerformance {
         messagesRead += 1
         bytesRead += message.message.payloadSize
       }
-      
+
       if(messagesRead == 0 || totalMessagesRead > config.numMessages)
         done = true
       else
         // we only did one fetch so we find the offset for the first (head) messageset
         offset += messageSet.validBytes
-      
+
       totalBytesRead += bytesRead
       totalMessagesRead += messagesRead
       consumedInterval += messagesRead
-      
+
       if(consumedInterval > config.reportingInterval) {
         if(config.showDetailedStats) {
           val reportTime = System.currentTimeMillis
@@ -141,13 +141,8 @@ object SimpleConsumerPerformance {
 
     val options = parser.parse(args : _*)
 
-    for(arg <- List(topicOpt, urlOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, topicOpt, urlOpt)
+
     val url = new URI(options.valueOf(urlOpt))
     val fetchSize = options.valueOf(fetchSizeOpt).intValue
     val fromLatest = options.has(resetBeginningOffsetOpt)
diff --git a/core/src/main/scala/kafka/tools/SimpleConsumerShell.scala b/core/src/main/scala/kafka/tools/SimpleConsumerShell.scala
index 747e07280cce7..b4f903b6c7c3b 100644
--- a/core/src/main/scala/kafka/tools/SimpleConsumerShell.scala
+++ b/core/src/main/scala/kafka/tools/SimpleConsumerShell.scala
@@ -93,15 +93,12 @@ object SimpleConsumerShell extends Logging {
         "skip it instead of halt.")
     val noWaitAtEndOfLogOpt = parser.accepts("no-wait-at-logend",
         "If set, when the simple consumer reaches the end of the Log, it will stop, not waiting for new produced messages")
+        
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "A low-level tool for fetching data directly from a particular replica.")
 
     val options = parser.parse(args : _*)
-    for(arg <- List(brokerListOpt, topicOpt, partitionIdOpt)) {
-      if(!options.has(arg)) {
-        error("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, brokerListOpt, topicOpt, partitionIdOpt)
 
     val topic = options.valueOf(topicOpt)
     val partitionId = options.valueOf(partitionIdOpt).intValue()
@@ -117,7 +114,7 @@ object SimpleConsumerShell extends Logging {
     val noWaitAtEndOfLog = options.has(noWaitAtEndOfLogOpt)
 
     val messageFormatterClass = Class.forName(options.valueOf(messageFormatterOpt))
-    val formatterArgs = MessageFormatter.tryParseFormatterArgs(options.valuesOf(messageFormatterArgOpt))
+    val formatterArgs = CommandLineUtils.parseKeyValueArgs(options.valuesOf(messageFormatterArgOpt))
 
     val fetchRequestBuilder = new FetchRequestBuilder()
                        .clientId(clientId)
@@ -127,7 +124,9 @@ object SimpleConsumerShell extends Logging {
 
     // getting topic metadata
     info("Getting topic metatdata...")
-    val metadataTargetBrokers = ClientUtils.parseBrokerList(options.valueOf(brokerListOpt))
+    val brokerList = options.valueOf(brokerListOpt)
+    ToolsUtils.validatePortOrDie(parser,brokerList)
+    val metadataTargetBrokers = ClientUtils.parseBrokerList(brokerList)
     val topicsMetadata = ClientUtils.fetchTopicMetadata(Set(topic), metadataTargetBrokers, clientId, maxWaitMs).topicsMetadata
     if(topicsMetadata.size != 1 || !topicsMetadata(0).topic.equals(topic)) {
       System.err.println(("Error: no valid topic metadata for topic: %s, " + "what we get from server is only: %s").format(topic, topicsMetadata))
diff --git a/core/src/main/scala/kafka/tools/StateChangeLogMerger.scala b/core/src/main/scala/kafka/tools/StateChangeLogMerger.scala
index 97970fb941faf..b34b8c7d7d4ca 100644
--- a/core/src/main/scala/kafka/tools/StateChangeLogMerger.scala
+++ b/core/src/main/scala/kafka/tools/StateChangeLogMerger.scala
@@ -22,7 +22,7 @@ import scala.util.matching.Regex
 import collection.mutable
 import java.util.Date
 import java.text.SimpleDateFormat
-import kafka.utils.Logging
+import kafka.utils.{Utils, Logging, CommandLineUtils}
 import kafka.common.Topic
 import java.io.{BufferedOutputStream, OutputStream}
 
@@ -83,6 +83,9 @@ object StateChangeLogMerger extends Logging {
                               .describedAs("end timestamp in the format " + dateFormat)
                               .ofType(classOf[String])
                               .defaultsTo("9999-12-31 23:59:59,999")
+                              
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "A tool for merging the log files from several brokers to reconnstruct a unified history of what happened.")
 
 
     val options = parser.parse(args : _*)
@@ -112,6 +115,11 @@ object StateChangeLogMerger extends Logging {
     }
     if (options.has(partitionsOpt)) {
       partitions = options.valueOf(partitionsOpt).split(",").toList.map(_.toInt)
+      val duplicatePartitions = Utils.duplicates(partitions)
+      if (duplicatePartitions.nonEmpty) {
+        System.err.println("The list of partitions contains repeated entries: %s".format(duplicatePartitions.mkString(",")))
+        System.exit(1)
+      }
     }
     startDate = dateFormat.parse(options.valueOf(startTimeOpt).replace('\"', ' ').trim)
     endDate = dateFormat.parse(options.valueOf(endTimeOpt).replace('\"', ' ').trim)
diff --git a/core/src/main/scala/kafka/tools/TestEndToEndLatency.scala b/core/src/main/scala/kafka/tools/TestEndToEndLatency.scala
new file mode 100644
index 0000000000000..2ebc7bf643ea9
--- /dev/null
+++ b/core/src/main/scala/kafka/tools/TestEndToEndLatency.scala
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.tools
+
+import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord, KafkaProducer}
+
+import kafka.consumer._
+
+import java.util.Properties
+import java.util.Arrays
+
+object TestEndToEndLatency {
+  def main(args: Array[String]) {
+    if (args.length != 6) {
+      System.err.println("USAGE: java " + getClass().getName + " broker_list zookeeper_connect topic num_messages consumer_fetch_max_wait producer_acks")
+      System.exit(1)
+    }
+
+    val brokerList = args(0)
+    val zkConnect = args(1)
+    val topic = args(2)
+    val numMessages = args(3).toInt
+    val consumerFetchMaxWait = args(4).toInt
+    val producerAcks = args(5).toInt
+
+    val consumerProps = new Properties()
+    consumerProps.put("group.id", topic)
+    consumerProps.put("auto.commit.enable", "false")
+    consumerProps.put("auto.offset.reset", "largest")
+    consumerProps.put("zookeeper.connect", zkConnect)
+    consumerProps.put("fetch.wait.max.ms", consumerFetchMaxWait.toString)
+    consumerProps.put("socket.timeout.ms", 1201000.toString)
+
+    val config = new ConsumerConfig(consumerProps)
+    val connector = Consumer.create(config)
+    val stream = connector.createMessageStreams(Map(topic -> 1)).get(topic).head.head
+    val iter = stream.iterator
+
+    val producerProps = new Properties()
+    producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
+    producerProps.put(ProducerConfig.LINGER_MS_CONFIG, "0")
+    producerProps.put(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG, "true")
+    producerProps.put(ProducerConfig.ACKS_CONFIG, producerAcks.toString)
+    val producer = new KafkaProducer[Array[Byte],Array[Byte]](producerProps)
+
+    // make sure the consumer fetcher has started before sending data since otherwise
+    // the consumption from the tail will skip the first message and hence be blocked
+    Thread.sleep(5000)
+
+    val message = "hello there beautiful".getBytes
+    var totalTime = 0.0
+    val latencies = new Array[Long](numMessages)
+    for (i <- 0 until numMessages) {
+      val begin = System.nanoTime
+      producer.send(new ProducerRecord[Array[Byte],Array[Byte]](topic, message))
+      val received = iter.next
+      val elapsed = System.nanoTime - begin
+      // poor man's progress bar
+      if (i % 1000 == 0)
+        println(i + "\t" + elapsed / 1000.0 / 1000.0)
+      totalTime += elapsed
+      latencies(i) = (elapsed / 1000 / 1000)
+    }
+    println("Avg latency: %.4f ms\n".format(totalTime / numMessages / 1000.0 / 1000.0))
+    Arrays.sort(latencies)
+    val p50 = latencies((latencies.length * 0.5).toInt)
+    val p99 = latencies((latencies.length * 0.99).toInt) 
+    val p999 = latencies((latencies.length * 0.999).toInt)
+    println("Percentiles: 50th = %d, 99th = %d, 99.9th = %d".format(p50, p99, p999))
+    producer.close()
+    connector.commitOffsets(true)
+    connector.shutdown()
+    System.exit(0)
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/other/kafka/TestLogCleaning.scala b/core/src/main/scala/kafka/tools/TestLogCleaning.scala
similarity index 91%
rename from core/src/test/scala/other/kafka/TestLogCleaning.scala
rename to core/src/main/scala/kafka/tools/TestLogCleaning.scala
index 22b16e54980e3..b81010ec0fa98 100644
--- a/core/src/test/scala/other/kafka/TestLogCleaning.scala
+++ b/core/src/main/scala/kafka/tools/TestLogCleaning.scala
@@ -15,20 +15,18 @@
  * limitations under the License.
  */
 
-package kafka
+package kafka.tools
 
 import joptsimple.OptionParser
 import java.util.Properties
 import java.util.Random
 import java.io._
-import scala.io.Source
-import scala.io.BufferedSource
-import kafka.producer._
 import kafka.consumer._
 import kafka.serializer._
 import kafka.utils._
 import kafka.log.FileMessageSet
 import kafka.log.Log
+import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer, ProducerConfig}
 
 /**
  * This is a torture test that runs against an existing broker. Here is how it works:
@@ -89,15 +87,15 @@ object TestLogCleaning {
     
     val options = parser.parse(args:_*)
     
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "An integration test for log cleaning.")
+    
     if(options.has(dumpOpt)) {
       dumpLog(new File(options.valueOf(dumpOpt)))
       System.exit(0)
     }
     
-    if(!options.has(brokerOpt) || !options.has(zkConnectOpt) || !options.has(numMessagesOpt)) {
-      parser.printHelpOn(System.err)
-      System.exit(1)
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, brokerOpt, zkConnectOpt, numMessagesOpt)
     
     // parse options
     val messages = options.valueOf(numMessagesOpt).longValue
@@ -123,14 +121,14 @@ object TestLogCleaning {
     val reduction = 1.0 - consumedLines.toDouble/producedLines.toDouble
     println("%d rows of data produced, %d rows of data consumed (%.1f%% reduction).".format(producedLines, consumedLines, 100 * reduction))
     
-    println("Deduplicating and validating output files...")
+    println("De-duplicating and validating output files...")
     validateOutput(producedDataFile, consumedDataFile)
     producedDataFile.delete()
     consumedDataFile.delete()
   }
   
   def dumpLog(dir: File) {
-    require(dir.exists, "Non-existant directory: " + dir.getAbsolutePath)
+    require(dir.exists, "Non-existent directory: " + dir.getAbsolutePath)
     for(file <- dir.list.sorted; if file.endsWith(Log.LogFileSuffix)) {
       val ms = new FileMessageSet(new File(dir, file))
       for(entry <- ms) {
@@ -242,13 +240,9 @@ object TestLogCleaning {
                       dups: Int,
                       percentDeletes: Int): File = {
     val producerProps = new Properties
-    producerProps.setProperty("producer.type", "async")
-    producerProps.setProperty("broker.list", brokerUrl)
-    producerProps.setProperty("serializer.class", classOf[StringEncoder].getName)
-    producerProps.setProperty("key.serializer.class", classOf[StringEncoder].getName)
-    producerProps.setProperty("queue.enqueue.timeout.ms", "-1")
-    producerProps.setProperty("batch.size", 1000.toString)
-    val producer = new Producer[String, String](new ProducerConfig(producerProps))
+    producerProps.setProperty(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG, "true")
+    producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerUrl)
+    val producer = new KafkaProducer[Array[Byte],Array[Byte]](producerProps)
     val rand = new Random(1)
     val keyCount = (messages / dups).toInt
     val producedFile = File.createTempFile("kafka-log-cleaner-produced-", ".txt")
@@ -260,9 +254,9 @@ object TestLogCleaning {
       val delete = i % 100 < percentDeletes
       val msg = 
         if(delete)
-          new KeyedMessage[String, String](topic = topic, key = key.toString, message = null)
+          new ProducerRecord[Array[Byte],Array[Byte]](topic, key.toString.getBytes(), null)
         else
-          new KeyedMessage[String, String](topic = topic, key = key.toString, message = i.toString)
+          new ProducerRecord[Array[Byte],Array[Byte]](topic, key.toString.getBytes(), i.toString.getBytes())
       producer.send(msg)
       producedWriter.write(TestRecord(topic, key, i, delete).toString)
       producedWriter.newLine()
@@ -275,8 +269,9 @@ object TestLogCleaning {
   def makeConsumer(zkUrl: String, topics: Array[String]): ZookeeperConsumerConnector = {
     val consumerProps = new Properties
     consumerProps.setProperty("group.id", "log-cleaner-test-" + new Random().nextInt(Int.MaxValue))
-    consumerProps.setProperty("zk.connect", zkUrl)
-    consumerProps.setProperty("consumer.timeout.ms", (10*1000).toString)
+    consumerProps.setProperty("zookeeper.connect", zkUrl)
+    consumerProps.setProperty("consumer.timeout.ms", (20*1000).toString)
+    consumerProps.setProperty("auto.offset.reset", "smallest")
     new ZookeeperConsumerConnector(new ConsumerConfig(consumerProps))
   }
   
diff --git a/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala b/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala
index eac9af2e1d459..aef8361b73a09 100644
--- a/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala
+++ b/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala
@@ -19,17 +19,20 @@ package kafka.tools
 
 import joptsimple.OptionParser
 import org.I0Itec.zkclient.ZkClient
-import kafka.utils.{Logging, ZKGroupTopicDirs, ZkUtils, ZKStringSerializer}
+import kafka.utils.{Logging, ZKGroupTopicDirs, ZkUtils, ZKStringSerializer, CommandLineUtils}
 
 object VerifyConsumerRebalance extends Logging {
   def main(args: Array[String]) {
     val parser = new OptionParser()
 
     val zkConnectOpt = parser.accepts("zookeeper.connect", "ZooKeeper connect string.").
-      withRequiredArg().defaultsTo("localhost:2181").ofType(classOf[String]);
+      withRequiredArg().defaultsTo("localhost:2181").ofType(classOf[String])
     val groupOpt = parser.accepts("group", "Consumer group.").
       withRequiredArg().ofType(classOf[String])
     parser.accepts("help", "Print this message.")
+    
+    if(args.length == 0)
+      CommandLineUtils.printUsageAndDie(parser, "Validate that all partitions have a consumer for a given consumer group.")
 
     val options = parser.parse(args : _*)
 
@@ -38,12 +41,7 @@ object VerifyConsumerRebalance extends Logging {
       System.exit(0)
     }
 
-    for (opt <- List(groupOpt))
-      if (!options.has(opt)) {
-        System.err.println("Missing required argument: %s".format(opt))
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
+    CommandLineUtils.checkRequiredArgs(parser, options, groupOpt)
 
     val zkConnect = options.valueOf(zkConnectOpt)
     val group = options.valueOf(groupOpt)
@@ -78,7 +76,7 @@ object VerifyConsumerRebalance extends Logging {
      * This means that for each partition registered under /brokers/topics/[topic]/[broker-id], an owner exists
      * under /consumers/[consumer_group]/owners/[topic]/[broker_id-partition_id]
      */
-    val consumersPerTopicMap = ZkUtils.getConsumersPerTopic(zkClient, group)
+    val consumersPerTopicMap = ZkUtils.getConsumersPerTopic(zkClient, group, excludeInternalTopics = false)
     val partitionsPerTopicMap = ZkUtils.getPartitionsForTopics(zkClient, consumersPerTopicMap.keySet.toSeq)
 
     partitionsPerTopicMap.foreach { partitionsForTopic =>
diff --git a/core/src/main/scala/kafka/tools/ZooKeeperMainWrapper.scala b/core/src/main/scala/kafka/tools/ZooKeeperMainWrapper.scala
new file mode 100644
index 0000000000000..4c51f312e9ee8
--- /dev/null
+++ b/core/src/main/scala/kafka/tools/ZooKeeperMainWrapper.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.tools
+
+import org.apache.zookeeper.ZooKeeperMain
+
+class ZooKeeperMainWrapper(args: Array[String]) extends ZooKeeperMain(args) {
+  def runCmd(): Unit = {
+    processCmd(this.cl)
+    System.exit(0)
+  }
+}
+
+/**
+ * ZooKeeper 3.4.6 broke being able to pass commands on command line.
+ * See ZOOKEEPER-1897.  This class is a hack to restore this faclity.
+ */
+object ZooKeeperMainWrapper {
+
+  def main(args: Array[String]): Unit = {
+    val main: ZooKeeperMainWrapper = new ZooKeeperMainWrapper(args)
+    main.runCmd()
+  }
+}
diff --git a/core/src/main/scala/kafka/utils/Annotations_2.9+.scala b/core/src/main/scala/kafka/utils/Annotations.scala
similarity index 100%
rename from core/src/main/scala/kafka/utils/Annotations_2.9+.scala
rename to core/src/main/scala/kafka/utils/Annotations.scala
diff --git a/core/src/main/scala/kafka/utils/ByteBoundedBlockingQueue.scala b/core/src/main/scala/kafka/utils/ByteBoundedBlockingQueue.scala
new file mode 100644
index 0000000000000..26149af943c1d
--- /dev/null
+++ b/core/src/main/scala/kafka/utils/ByteBoundedBlockingQueue.scala
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.utils
+
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.{TimeUnit, LinkedBlockingQueue}
+
+/**
+ * A blocking queue that have size limits on both number of elements and number of bytes.
+ */
+class ByteBoundedBlockingQueue[E] (val queueNumMessageCapacity: Int, val queueByteCapacity: Int, sizeFunction: Option[(E) => Int])
+    extends Iterable[E] {
+  private val queue = new LinkedBlockingQueue[E] (queueNumMessageCapacity)
+  private var currentByteSize = new AtomicInteger()
+  private val putLock = new Object
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#offer]]
+   * An element can be enqueued provided the current size (in number of elements) is within the configured
+   * capacity and the current size in bytes of the queue is within the configured byte capacity. i.e., the
+   * element may be enqueued even if adding it causes the queue's size in bytes to exceed the byte capacity.
+   * @param e the element to put into the queue
+   * @param timeout the amount of time to wait before the expire the operation
+   * @param unit the time unit of timeout parameter, default to millisecond
+   * @return true if the element is put into queue, false if it is not
+   * @throws NullPointerException if element is null
+   * @throws InterruptedException if interrupted during waiting
+   */
+  def offer(e: E, timeout: Long, unit: TimeUnit = TimeUnit.MICROSECONDS): Boolean = {
+    if (e == null) throw new NullPointerException("Putting null element into queue.")
+    val startTime = SystemTime.nanoseconds
+    val expireTime = startTime + unit.toNanos(timeout)
+    putLock synchronized {
+      var timeoutNanos = expireTime - SystemTime.nanoseconds
+      while (currentByteSize.get() >= queueByteCapacity && timeoutNanos > 0) {
+        // ensure that timeoutNanos > 0, otherwise (per javadoc) we have to wait until the next notify
+        putLock.wait(timeoutNanos / 1000000, (timeoutNanos % 1000000).toInt)
+        timeoutNanos = expireTime - SystemTime.nanoseconds
+      }
+      // only proceed if queue has capacity and not timeout
+      timeoutNanos = expireTime - SystemTime.nanoseconds
+      if (currentByteSize.get() < queueByteCapacity && timeoutNanos > 0) {
+        val success = queue.offer(e, timeoutNanos, TimeUnit.NANOSECONDS)
+        // only increase queue byte size if put succeeds
+        if (success)
+          currentByteSize.addAndGet(sizeFunction.get(e))
+        // wake up another thread in case multiple threads are waiting
+        if (currentByteSize.get() < queueByteCapacity)
+          putLock.notify()
+        success
+      } else {
+        false
+      }
+    }
+  }
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#offer]].
+   * Put an element to the tail of the queue, return false immediately if queue is full
+   * @param e The element to put into queue
+   * @return true on succeed, false on failure
+   * @throws NullPointerException if element is null
+   * @throws InterruptedException if interrupted during waiting
+   */
+  def offer(e: E): Boolean = {
+    if (e == null) throw new NullPointerException("Putting null element into queue.")
+    putLock synchronized {
+      if (currentByteSize.get() >= queueByteCapacity) {
+        false
+      } else {
+        val success = queue.offer(e)
+        if (success)
+          currentByteSize.addAndGet(sizeFunction.get(e))
+        // wake up another thread in case multiple threads are waiting
+        if (currentByteSize.get() < queueByteCapacity)
+          putLock.notify()
+        success
+      }
+    }
+  }
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#put]].
+   * Put an element to the tail of the queue, block if queue is full
+   * @param e The element to put into queue
+   * @return true on succeed, false on failure
+   * @throws NullPointerException if element is null
+   * @throws InterruptedException if interrupted during waiting
+   */
+  def put(e: E): Boolean = {
+    if (e == null) throw new NullPointerException("Putting null element into queue.")
+    putLock synchronized {
+      if (currentByteSize.get() >= queueByteCapacity)
+        putLock.wait()
+      val success = queue.offer(e)
+      if (success)
+        currentByteSize.addAndGet(sizeFunction.get(e))
+      // wake up another thread in case multiple threads are waiting
+      if (currentByteSize.get() < queueByteCapacity)
+        putLock.notify()
+      success
+    }
+  }
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#poll]]
+   * Get an element from the head of queue. Wait for some time if the queue is empty.
+   * @param timeout the amount of time to wait if the queue is empty
+   * @param unit the unit type
+   * @return the first element in the queue, null if queue is empty
+   */
+  def poll(timeout: Long, unit: TimeUnit): E = {
+    val e = queue.poll(timeout, unit)
+    // only wake up waiting threads if the queue size drop under queueByteCapacity
+    if (e != null &&
+        currentByteSize.getAndAdd(-sizeFunction.get(e)) > queueByteCapacity &&
+        currentByteSize.get() < queueByteCapacity)
+      putLock.synchronized(putLock.notify())
+    e
+  }
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#poll]]
+   * Get an element from the head of queue.
+   * @return the first element in the queue, null if queue is empty
+   */
+  def poll(): E = {
+    val e = queue.poll()
+    // only wake up waiting threads if the queue size drop under queueByteCapacity
+    if (e != null &&
+      currentByteSize.getAndAdd(-sizeFunction.get(e)) > queueByteCapacity &&
+      currentByteSize.get() < queueByteCapacity)
+      putLock.synchronized(putLock.notify())
+    e
+  }
+
+  /**
+   * Please refer to [[java.util.concurrent.BlockingQueue#take]]
+   * Get an element from the head of the queue, block if the queue is empty
+   * @return the first element in the queue, null if queue is empty
+   */
+  def take(): E = {
+    val e = queue.take()
+    // only wake up waiting threads if the queue size drop under queueByteCapacity
+    if (currentByteSize.getAndAdd(-sizeFunction.get(e)) > queueByteCapacity &&
+        currentByteSize.get() < queueByteCapacity)
+      putLock.synchronized(putLock.notify())
+    e
+  }
+
+  /**
+   * Iterator for the queue
+   * @return Iterator for the queue
+   */
+  override def iterator = new Iterator[E] () {
+    private val iter = queue.iterator()
+    private var curr: E = null.asInstanceOf[E]
+
+    def hasNext: Boolean = iter.hasNext
+
+    def next(): E = {
+      curr = iter.next()
+      curr
+    }
+
+    def remove() {
+      if (curr == null)
+        throw new IllegalStateException("Iterator does not have a current element.")
+      iter.remove()
+      if (currentByteSize.addAndGet(-sizeFunction.get(curr)) < queueByteCapacity)
+        putLock.synchronized(putLock.notify())
+    }
+  }
+
+  /**
+   * get the number of elements in the queue
+   * @return number of elements in the queue
+   */
+  override def size() = queue.size()
+
+  /**
+   * get the current byte size in the queue
+   * @return current queue size in bytes
+   */
+  def byteSize() = {
+    val currSize = currentByteSize.get()
+    // There is a potential race where after an element is put into the queue and before the size is added to
+    // currentByteSize, it was taken out of the queue and the size was deducted from the currentByteSize,
+    // in that case, currentByteSize would become negative, in that case, just put the queue size to be 0.
+    if (currSize > 0) currSize else 0
+  }
+
+  /**
+   * get the number of unused slots in the queue
+   * @return the number of unused slots in the queue
+   */
+  def remainingSize = queue.remainingCapacity()
+
+  /**
+   * get the remaining bytes capacity of the queue
+   * @return the remaining bytes capacity of the queue
+   */
+  def remainingByteSize = math.max(0, queueByteCapacity - currentByteSize.get())
+
+  /**
+   * remove all the items in the queue
+   */
+  def clear() {
+    putLock synchronized {
+      queue.clear()
+      currentByteSize.set(0)
+      putLock.notify()
+    }
+  }
+}
diff --git a/core/src/main/scala/kafka/utils/CommandLineUtils.scala b/core/src/main/scala/kafka/utils/CommandLineUtils.scala
index 726c30257dcb5..086a62483fad0 100644
--- a/core/src/main/scala/kafka/utils/CommandLineUtils.scala
+++ b/core/src/main/scala/kafka/utils/CommandLineUtils.scala
@@ -18,31 +18,59 @@
 
 import joptsimple.{OptionSpec, OptionSet, OptionParser}
 import scala.collection.Set
+ import java.util.Properties
 
  /**
  * Helper functions for dealing with command line utilities
  */
 object CommandLineUtils extends Logging {
 
+  /**
+   * Check that all the listed options are present
+   */
   def checkRequiredArgs(parser: OptionParser, options: OptionSet, required: OptionSpec[_]*) {
     for(arg <- required) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"")
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
+      if(!options.has(arg))
+        printUsageAndDie(parser, "Missing required argument \"" + arg + "\"")
     }
   }
   
+  /**
+   * Check that none of the listed options are present
+   */
   def checkInvalidArgs(parser: OptionParser, options: OptionSet, usedOption: OptionSpec[_], invalidOptions: Set[OptionSpec[_]]) {
     if(options.has(usedOption)) {
       for(arg <- invalidOptions) {
-        if(options.has(arg)) {
-          System.err.println("Option \"" + usedOption + "\" can't be used with option\"" + arg + "\"")
-          parser.printHelpOn(System.err)
-          System.exit(1)
-        }
+        if(options.has(arg))
+          printUsageAndDie(parser, "Option \"" + usedOption + "\" can't be used with option\"" + arg + "\"")
+      }
+    }
+  }
+  
+  /**
+   * Print usage and exit
+   */
+  def printUsageAndDie(parser: OptionParser, message: String) {
+    System.err.println(message)
+    parser.printHelpOn(System.err)
+    System.exit(1)
+  }
+
+  /**
+   * Parse key-value pairs in the form key=value
+   */
+  def parseKeyValueArgs(args: Iterable[String]): Properties = {
+    val splits = args.map(_ split "=").filterNot(_.length == 0)
+
+    val props = new Properties
+    for(a <- splits) {
+      if (a.length == 1) props.put(a(0), "")
+      else if (a.length == 2) props.put(a(0), a(1))
+      else {
+        System.err.println("Invalid command line properties: " + args.mkString(" "))
+        System.exit(1)
       }
     }
+    props
   }
 }
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/utils/DelayedItem.scala b/core/src/main/scala/kafka/utils/DelayedItem.scala
index d7276494072f1..a4e0dabc858bc 100644
--- a/core/src/main/scala/kafka/utils/DelayedItem.scala
+++ b/core/src/main/scala/kafka/utils/DelayedItem.scala
@@ -20,7 +20,7 @@ package kafka.utils
 import java.util.concurrent._
 import scala.math._
 
-class DelayedItem[T](val item: T, delay: Long, unit: TimeUnit) extends Delayed with Logging {
+class DelayedItem(delay: Long, unit: TimeUnit) extends Delayed with Logging {
 
   val createdMs = SystemTime.milliseconds
   val delayMs = {
@@ -29,8 +29,8 @@ class DelayedItem[T](val item: T, delay: Long, unit: TimeUnit) extends Delayed w
     else given
   }
 
-  def this(item: T, delayMs: Long) = 
-    this(item, delayMs, TimeUnit.MILLISECONDS)
+  def this(delayMs: Long) =
+    this(delayMs, TimeUnit.MILLISECONDS)
 
   /**
    * The remaining delay time
@@ -41,7 +41,7 @@ class DelayedItem[T](val item: T, delay: Long, unit: TimeUnit) extends Delayed w
   }
     
   def compareTo(d: Delayed): Int = {
-    val delayed = d.asInstanceOf[DelayedItem[T]]
+    val delayed = d.asInstanceOf[DelayedItem]
     val myEnd = createdMs + delayMs
     val yourEnd = delayed.createdMs + delayed.delayMs
 
diff --git a/core/src/main/scala/kafka/utils/KafkaScheduler.scala b/core/src/main/scala/kafka/utils/KafkaScheduler.scala
index 8e3750506c180..9a16343d2ff71 100644
--- a/core/src/main/scala/kafka/utils/KafkaScheduler.scala
+++ b/core/src/main/scala/kafka/utils/KafkaScheduler.scala
@@ -93,16 +93,14 @@ class KafkaScheduler(val threads: Int,
     debug("Scheduling task %s with initial delay %d ms and period %d ms."
         .format(name, TimeUnit.MILLISECONDS.convert(delay, unit), TimeUnit.MILLISECONDS.convert(period, unit)))
     ensureStarted
-    val runnable = new Runnable {
-      def run() = {
-        try {
-          trace("Begining execution of scheduled task '%s'.".format(name))
-          fun()
-        } catch {
-          case t: Throwable => error("Uncaught exception in scheduled task '" + name +"'", t)
-        } finally {
-          trace("Completed execution of scheduled task '%s'.".format(name))
-        }
+    val runnable = Utils.runnable {
+      try {
+        trace("Begining execution of scheduled task '%s'.".format(name))
+        fun()
+      } catch {
+        case t: Throwable => error("Uncaught exception in scheduled task '" + name +"'", t)
+      } finally {
+        trace("Completed execution of scheduled task '%s'.".format(name))
       }
     }
     if(period >= 0)
diff --git a/core/src/main/scala/kafka/utils/ReplicationUtils.scala b/core/src/main/scala/kafka/utils/ReplicationUtils.scala
new file mode 100644
index 0000000000000..715767380f7c2
--- /dev/null
+++ b/core/src/main/scala/kafka/utils/ReplicationUtils.scala
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.utils
+
+import kafka.api.LeaderAndIsr
+import kafka.controller.LeaderIsrAndControllerEpoch
+import org.apache.zookeeper.data.Stat
+import org.I0Itec.zkclient.ZkClient
+
+import scala.Some
+import scala.collection._
+
+object ReplicationUtils extends Logging {
+
+  def updateLeaderAndIsr(zkClient: ZkClient, topic: String, partitionId: Int, newLeaderAndIsr: LeaderAndIsr, controllerEpoch: Int,
+    zkVersion: Int): (Boolean,Int) = {
+    debug("Updated ISR for partition [%s,%d] to %s".format(topic, partitionId, newLeaderAndIsr.isr.mkString(",")))
+    val path = ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partitionId)
+    val newLeaderData = ZkUtils.leaderAndIsrZkData(newLeaderAndIsr, controllerEpoch)
+    // use the epoch of the controller that made the leadership decision, instead of the current controller epoch
+    ZkUtils.conditionalUpdatePersistentPath(zkClient, path, newLeaderData, zkVersion, Some(checkLeaderAndIsrZkData))
+  }
+
+  def checkLeaderAndIsrZkData(zkClient: ZkClient, path: String, expectedLeaderAndIsrInfo: String): (Boolean,Int) = {
+    try {
+      val writtenLeaderAndIsrInfo = ZkUtils.readDataMaybeNull(zkClient, path)
+      val writtenLeaderOpt = writtenLeaderAndIsrInfo._1
+      val writtenStat = writtenLeaderAndIsrInfo._2
+      val expectedLeader = parseLeaderAndIsr(expectedLeaderAndIsrInfo, path, writtenStat)
+      writtenLeaderOpt match {
+        case Some(writtenData) =>
+          val writtenLeader = parseLeaderAndIsr(writtenData, path, writtenStat)
+          (expectedLeader,writtenLeader) match {
+            case (Some(expectedLeader),Some(writtenLeader)) =>
+              if(expectedLeader == writtenLeader)
+                return (true,writtenStat.getVersion())
+            case _ =>
+          }
+        case None =>
+      }
+    } catch {
+      case e1: Exception =>
+    }
+    (false,-1)
+  }
+
+  def getLeaderIsrAndEpochForPartition(zkClient: ZkClient, topic: String, partition: Int):Option[LeaderIsrAndControllerEpoch] = {
+    val leaderAndIsrPath = ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partition)
+    val leaderAndIsrInfo = ZkUtils.readDataMaybeNull(zkClient, leaderAndIsrPath)
+    val leaderAndIsrOpt = leaderAndIsrInfo._1
+    val stat = leaderAndIsrInfo._2
+    leaderAndIsrOpt match {
+      case Some(leaderAndIsrStr) => parseLeaderAndIsr(leaderAndIsrStr, leaderAndIsrPath, stat)
+      case None => None
+    }
+  }
+
+  private def parseLeaderAndIsr(leaderAndIsrStr: String, path: String, stat: Stat)
+      : Option[LeaderIsrAndControllerEpoch] = {
+    Json.parseFull(leaderAndIsrStr) match {
+      case Some(m) =>
+        val leaderIsrAndEpochInfo = m.asInstanceOf[Map[String, Any]]
+        val leader = leaderIsrAndEpochInfo.get("leader").get.asInstanceOf[Int]
+        val epoch = leaderIsrAndEpochInfo.get("leader_epoch").get.asInstanceOf[Int]
+        val isr = leaderIsrAndEpochInfo.get("isr").get.asInstanceOf[List[Int]]
+        val controllerEpoch = leaderIsrAndEpochInfo.get("controller_epoch").get.asInstanceOf[Int]
+        val zkPathVersion = stat.getVersion
+        debug("Leader %d, Epoch %d, Isr %s, Zk path version %d for leaderAndIsrPath %s".format(leader, epoch,
+          isr.toString(), zkPathVersion, path))
+        Some(LeaderIsrAndControllerEpoch(LeaderAndIsr(leader, epoch, isr, zkPathVersion), controllerEpoch))
+      case None => None
+    }
+  }
+
+}
diff --git a/core/src/main/scala/kafka/utils/ShutdownableThread.scala b/core/src/main/scala/kafka/utils/ShutdownableThread.scala
index cf8adc9f468f4..fc226c863095b 100644
--- a/core/src/main/scala/kafka/utils/ShutdownableThread.scala
+++ b/core/src/main/scala/kafka/utils/ShutdownableThread.scala
@@ -27,20 +27,29 @@ abstract class ShutdownableThread(val name: String, val isInterruptible: Boolean
   val isRunning: AtomicBoolean = new AtomicBoolean(true)
   private val shutdownLatch = new CountDownLatch(1)
 
+  def shutdown() = {
+    initiateShutdown()
+    awaitShutdown()
+  }
 
-  def shutdown(): Unit = {
-    info("Shutting down")
-    isRunning.set(false)
-    if (isInterruptible)
-      interrupt()
-    shutdownLatch.await()
-    info("Shutdown completed")
+  def initiateShutdown(): Boolean = {
+    if(isRunning.compareAndSet(true, false)) {
+      info("Shutting down")
+      isRunning.set(false)
+      if (isInterruptible)
+        interrupt()
+      true
+    } else
+      false
   }
 
     /**
-   * After calling shutdown(), use this API to wait until the shutdown is complete
+   * After calling initiateShutdown(), use this API to wait until the shutdown is complete
    */
-  def awaitShutdown(): Unit = shutdownLatch.await()
+  def awaitShutdown(): Unit = {
+    shutdownLatch.await()
+    info("Shutdown completed")
+  }
 
   def doWork(): Unit
 
diff --git a/core/src/main/scala/kafka/utils/Throttler.scala b/core/src/main/scala/kafka/utils/Throttler.scala
index c6c3c75ee8408..d1a144d788291 100644
--- a/core/src/main/scala/kafka/utils/Throttler.scala
+++ b/core/src/main/scala/kafka/utils/Throttler.scala
@@ -17,6 +17,8 @@
 
 package kafka.utils;
 
+import kafka.metrics.KafkaMetricsGroup
+import java.util.concurrent.TimeUnit
 import java.util.Random
 import scala.math._
 
@@ -33,14 +35,18 @@ import scala.math._
 @threadsafe
 class Throttler(val desiredRatePerSec: Double, 
                 val checkIntervalMs: Long = 100L, 
-                val throttleDown: Boolean = true, 
-                val time: Time = SystemTime) extends Logging {
+                val throttleDown: Boolean = true,
+                metricName: String = "throttler",
+                units: String = "entries",
+                val time: Time = SystemTime) extends Logging with KafkaMetricsGroup {
   
   private val lock = new Object
+  private val meter = newMeter(metricName, units, TimeUnit.SECONDS)
   private var periodStartNs: Long = time.nanoseconds
   private var observedSoFar: Double = 0.0
   
   def maybeThrottle(observed: Double) {
+    meter.mark(observed.toLong)
     lock synchronized {
       observedSoFar += observed
       val now = time.nanoseconds
@@ -72,7 +78,7 @@ object Throttler {
   
   def main(args: Array[String]) {
     val rand = new Random()
-    val throttler = new Throttler(100000, 100, true, SystemTime)
+    val throttler = new Throttler(100000, 100, true, time = SystemTime)
     val interval = 30000
     var start = System.currentTimeMillis
     var total = 0
diff --git a/core/src/main/scala/kafka/utils/ToolsUtils.scala b/core/src/main/scala/kafka/utils/ToolsUtils.scala
new file mode 100644
index 0000000000000..fef93929ea03e
--- /dev/null
+++ b/core/src/main/scala/kafka/utils/ToolsUtils.scala
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package kafka.utils
+
+import joptsimple.OptionParser
+
+object ToolsUtils {
+
+  def validatePortOrDie(parser: OptionParser, hostPort: String) = {
+    val hostPorts: Array[String] = if(hostPort.contains(','))
+      hostPort.split(",")
+    else
+      Array(hostPort)
+    val validHostPort = hostPorts.filter {
+      hostPortData =>
+        org.apache.kafka.common.utils.Utils.getPort(hostPortData) != null
+    }
+    val isValid = !(validHostPort.isEmpty) && validHostPort.size == hostPorts.length
+    if(!isValid)
+      CommandLineUtils.printUsageAndDie(parser, "Please provide valid host:port like host1:9091,host2:9092\n ")
+  }
+}
diff --git a/core/src/main/scala/kafka/utils/Utils.scala b/core/src/main/scala/kafka/utils/Utils.scala
index a89b0463685e6..738c1af9ef5de 100644
--- a/core/src/main/scala/kafka/utils/Utils.scala
+++ b/core/src/main/scala/kafka/utils/Utils.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -21,11 +21,10 @@ import java.io._
 import java.nio._
 import charset.Charset
 import java.nio.channels._
-import java.util.concurrent.locks.Lock
+import java.util.concurrent.locks.{ReadWriteLock, Lock}
 import java.lang.management._
 import javax.management._
 import scala.collection._
-import mutable.ListBuffer
 import scala.collection.mutable
 import java.util.Properties
 import kafka.common.KafkaException
@@ -34,10 +33,10 @@ import kafka.common.KafkaStorageException
 
 /**
  * General helper functions!
- * 
+ *
  * This is for general helper functions that aren't specific to Kafka logic. Things that should have been included in
- * the standard library etc. 
- * 
+ * the standard library etc.
+ *
  * If you are making a new helper function and want to add it to this class please ensure the following:
  * 1. It has documentation
  * 2. It is the most general possible utility, not just the thing you needed in one particular place
@@ -50,9 +49,9 @@ object Utils extends Logging {
    * @param fun A function
    * @return A Runnable that just executes the function
    */
-  def runnable(fun: () => Unit): Runnable = 
-    new Runnable() {
-      def run() = fun()
+  def runnable(fun: => Unit): Runnable =
+    new Runnable {
+      def run() = fun
     }
 
   /**
@@ -69,18 +68,18 @@ object Utils extends Logging {
    * @param runnable The runnable to execute in the background
    * @return The unstarted thread
    */
-  def daemonThread(name: String, runnable: Runnable): Thread = 
+  def daemonThread(name: String, runnable: Runnable): Thread =
     newThread(name, runnable, true)
-  
+
   /**
    * Create a daemon thread
    * @param name The name of the thread
    * @param fun The runction to execute in the thread
    * @return The unstarted thread
    */
-  def daemonThread(name: String, fun: () => Unit): Thread = 
+  def daemonThread(name: String, fun: () => Unit): Thread =
     daemonThread(name, runnable(fun))
-  
+
   /**
    * Create a new thread
    * @param name The name of the thread
@@ -89,16 +88,16 @@ object Utils extends Logging {
    * @return The unstarted thread
    */
   def newThread(name: String, runnable: Runnable, daemon: Boolean): Thread = {
-    val thread = new Thread(runnable, name) 
+    val thread = new Thread(runnable, name)
     thread.setDaemon(daemon)
     thread.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
       def uncaughtException(t: Thread, e: Throwable) {
         error("Uncaught exception in thread '" + t.getName + "':", e)
-      } 
+      }
     })
     thread
   }
-   
+
   /**
    * Create a new thread
    * @param runnable The work for the thread to do
@@ -115,7 +114,7 @@ object Utils extends Logging {
     })
     thread
   }
-  
+
   /**
    * Read the given byte buffer into a byte array
    */
@@ -140,12 +139,18 @@ object Utils extends Logging {
    * Read a properties file from the given path
    * @param filename The path of the file to read
    */
-  def loadProps(filename: String): Properties = {
-    val propStream = new FileInputStream(filename)
-    val props = new Properties()
-    props.load(propStream)
-    props
-  }
+   def loadProps(filename: String): Properties = {
+     val props = new Properties()
+     var propStream: InputStream = null
+     try {
+       propStream = new FileInputStream(filename)
+       props.load(propStream)
+     } finally {
+       if(propStream != null)
+         propStream.close
+     }
+     props
+   }
 
   /**
    * Open a channel for the given file
@@ -156,7 +161,7 @@ object Utils extends Logging {
     else
       new FileInputStream(file).getChannel()
   }
-  
+
   /**
    * Do the given action and log any exceptions thrown without rethrowing them
    * @param log The log method to use for logging. E.g. logger.warn
@@ -169,7 +174,7 @@ object Utils extends Logging {
       case e: Throwable => log(e.getMessage(), e)
     }
   }
-  
+
   /**
    * Test if two byte buffers are equal. In this case equality means having
    * the same bytes from the current position to the limit
@@ -186,7 +191,7 @@ object Utils extends Logging {
         return false
     return true
   }
-  
+
   /**
    * Translate the given buffer into a string
    * @param buffer The buffer to translate
@@ -197,7 +202,7 @@ object Utils extends Logging {
     buffer.get(bytes)
     new String(bytes, encoding)
   }
-  
+
   /**
    * Print an error message and shutdown the JVM
    * @param message The error message
@@ -206,19 +211,19 @@ object Utils extends Logging {
     System.err.println(message)
     System.exit(1)
   }
-  
+
   /**
    * Recursively delete the given file/directory and any subfiles (if any exist)
    * @param file The root file at which to begin deleting
    */
   def rm(file: String): Unit = rm(new File(file))
-  
+
   /**
    * Recursively delete the list of files/directories and any subfiles (if any exist)
    * @param a sequence of files to be deleted
    */
   def rm(files: Seq[String]): Unit = files.map(f => rm(new File(f)))
-  
+
   /**
    * Recursively delete the given file/directory and any subfiles (if any exist)
    * @param file The root file at which to begin deleting
@@ -237,7 +242,7 @@ object Utils extends Logging {
 	    file.delete()
 	  }
   }
-  
+
   /**
    * Register the given mbean with the platform mbean server,
    * unregistering any mbean that was there before. Note,
@@ -265,7 +270,7 @@ object Utils extends Logging {
       }
     }
   }
-  
+
   /**
    * Unregister the mbean with the given name, if there is one registered
    * @param name The mbean name to unregister
@@ -278,16 +283,16 @@ object Utils extends Logging {
         mbs.unregisterMBean(objName)
     }
   }
-  
+
   /**
-   * Read an unsigned integer from the current position in the buffer, 
+   * Read an unsigned integer from the current position in the buffer,
    * incrementing the position by 4 bytes
    * @param buffer The buffer to read from
    * @return The integer read, as a long to avoid signedness
    */
-  def readUnsignedInt(buffer: ByteBuffer): Long = 
+  def readUnsignedInt(buffer: ByteBuffer): Long =
     buffer.getInt() & 0xffffffffL
-  
+
   /**
    * Read an unsigned integer from the given position without modifying the buffers
    * position
@@ -295,33 +300,33 @@ object Utils extends Logging {
    * @param index the index from which to read the integer
    * @return The integer read, as a long to avoid signedness
    */
-  def readUnsignedInt(buffer: ByteBuffer, index: Int): Long = 
+  def readUnsignedInt(buffer: ByteBuffer, index: Int): Long =
     buffer.getInt(index) & 0xffffffffL
-  
+
   /**
    * Write the given long value as a 4 byte unsigned integer. Overflow is ignored.
    * @param buffer The buffer to write to
    * @param value The value to write
    */
-  def writetUnsignedInt(buffer: ByteBuffer, value: Long): Unit = 
+  def writetUnsignedInt(buffer: ByteBuffer, value: Long): Unit =
     buffer.putInt((value & 0xffffffffL).asInstanceOf[Int])
-  
+
   /**
    * Write the given long value as a 4 byte unsigned integer. Overflow is ignored.
    * @param buffer The buffer to write to
    * @param index The position in the buffer at which to begin writing
    * @param value The value to write
    */
-  def writeUnsignedInt(buffer: ByteBuffer, index: Int, value: Long): Unit = 
+  def writeUnsignedInt(buffer: ByteBuffer, index: Int, value: Long): Unit =
     buffer.putInt(index, (value & 0xffffffffL).asInstanceOf[Int])
-  
+
   /**
    * Compute the CRC32 of the byte array
    * @param bytes The array to compute the checksum for
    * @return The CRC32
    */
   def crc32(bytes: Array[Byte]): Long = crc32(bytes, 0, bytes.length)
-  
+
   /**
    * Compute the CRC32 of the segment of the byte array given by the specificed size and offset
    * @param bytes The bytes to checksum
@@ -334,7 +339,7 @@ object Utils extends Logging {
     crc.update(bytes, offset, size)
     crc.getValue()
   }
-  
+
   /**
    * Compute the hash code for the given items
    */
@@ -351,7 +356,7 @@ object Utils extends Logging {
     }
     return h
   }
-  
+
   /**
    * Group the given values by keys extracted with the given function
    */
@@ -363,12 +368,12 @@ object Utils extends Logging {
         case Some(l: List[V]) => m.put(k, v :: l)
         case None => m.put(k, List(v))
       }
-    } 
+    }
     m
   }
-  
+
   /**
-   * Read some bytes into the provided buffer, and return the number of bytes read. If the 
+   * Read some bytes into the provided buffer, and return the number of bytes read. If the
    * channel has been closed or we get -1 on the read for any reason, throw an EOFException
    */
   def read(channel: ReadableByteChannel, buffer: ByteBuffer): Int = {
@@ -376,8 +381,8 @@ object Utils extends Logging {
       case -1 => throw new EOFException("Received -1 when reading from channel, socket has likely been closed.")
       case n: Int => n
     }
-  } 
-  
+  }
+
   /**
    * Throw an exception if the given value is null, else return it. You can use this like:
    * val myValue = Utils.notNull(expressionThatShouldntBeNull)
@@ -389,36 +394,33 @@ object Utils extends Logging {
       v
   }
 
-  /**
-   * Parse a host and port out of a string
-   */
-  def parseHostPort(hostport: String) : (String, Int) = {
-    val splits = hostport.split(":")
-    (splits(0), splits(1).toInt)
-  }
-
   /**
    * Get the stack trace from an exception as a string
    */
   def stackTrace(e: Throwable): String = {
-    val sw = new StringWriter;
-    val pw = new PrintWriter(sw);
-    e.printStackTrace(pw);
-    sw.toString();
+    val sw = new StringWriter
+    val pw = new PrintWriter(sw)
+    e.printStackTrace(pw)
+    sw.toString()
   }
 
   /**
    * This method gets comma separated values which contains key,value pairs and returns a map of
    * key value pairs. the format of allCSVal is key1:val1, key2:val2 ....
+   * Also supports strings with multiple ":" such as IpV6 addresses, taking the last occurrence
+   * of the ":" in the pair as the split, eg a:b:c:val1, d:e:f:val2 => a:b:c -> val1, d:e:f -> val2
    */
   def parseCsvMap(str: String): Map[String, String] = {
     val map = new mutable.HashMap[String, String]
-    if("".equals(str))
-      return map    
-    val keyVals = str.split("\\s*,\\s*").map(s => s.split("\\s*:\\s*"))
-    keyVals.map(pair => (pair(0), pair(1))).toMap
+    if ("".equals(str))
+      return map
+    val keyVals = str.split("\\s*,\\s*").map(s => {
+      val lio = s.lastIndexOf(":")
+      Pair(s.substring(0,lio).trim, s.substring(lio + 1).trim)
+    })
+    keyVals.toMap
   }
-  
+
   /**
    * Parse a comma separated string into a sequence of strings.
    * Whitespace surrounding the comma will be removed.
@@ -470,12 +472,12 @@ object Utils extends Logging {
       stream.close()
     }
   }
-  
+
   /**
    * Get the absolute value of the given number. If the number is Int.MinValue return 0.
    * This is different from java.lang.Math.abs or scala.math.abs in that they return Int.MinValue (!).
    */
-  def abs(n: Int) = n & 0x7fffffff
+  def abs(n: Int) = if(n == Integer.MIN_VALUE) 0 else math.abs(n)
 
   /**
    * Replace the given string suffix with the new suffix. If the string doesn't end with the given suffix throw an exception.
@@ -489,7 +491,7 @@ object Utils extends Logging {
   /**
    * Create a file with the given path
    * @param path The path to create
-   * @throw KafkaStorageException If the file create fails
+   * @throws KafkaStorageException If the file create fails
    * @return The created file
    */
   def createFile(path: String): File = {
@@ -499,7 +501,7 @@ object Utils extends Logging {
       throw new KafkaStorageException("Failed to create file %s.".format(path))
     f
   }
-  
+
   /**
    * Turn a properties map into a string
    */
@@ -508,7 +510,33 @@ object Utils extends Logging {
     props.store(writer, "")
     writer.toString
   }
-  
+
+  /**
+   * Turn {@linkplain java.util.Properties} with default values into a {@linkplain java.util.Map}. Following example
+   * illustrates difference from the cast
+   * <pre>
+   * val defaults = new Properties()
+   * defaults.put("foo", "bar")
+   * val props = new Properties(defaults)
+   *
+   * props.getProperty("foo") // "bar"
+   * props.get("foo") // null
+   * evaluateDefaults(props).get("foo") // "bar"
+   * </pre>
+   *
+   * @param props properties to evaluate
+   * @return new java.util.Map instance
+   */
+  def evaluateDefaults(props: Properties): java.util.Map[String, String] = {
+    import java.util._
+    import JavaConversions.asScalaSet
+    val evaluated = new HashMap[String, String]()
+    for (name <- props.stringPropertyNames()) {
+      evaluated.put(name, props.getProperty(name))
+    }
+    evaluated
+  }
+
   /**
    * Read some properties with the given default values
    */
@@ -518,7 +546,7 @@ object Utils extends Logging {
     props.load(reader)
     props
   }
-  
+
   /**
    * Read a big-endian integer from a byte array
    */
@@ -528,17 +556,55 @@ object Utils extends Logging {
     ((bytes(offset + 2) & 0xFF) << 8) |
     (bytes(offset + 3) & 0xFF)
   }
-  
+
   /**
    * Execute the given function inside the lock
    */
   def inLock[T](lock: Lock)(fun: => T): T = {
     lock.lock()
     try {
-       return fun
+      fun
     } finally {
       lock.unlock()
     }
   }
-  
+
+  def inReadLock[T](lock: ReadWriteLock)(fun: => T): T = inLock[T](lock.readLock)(fun)
+
+  def inWriteLock[T](lock: ReadWriteLock)(fun: => T): T = inLock[T](lock.writeLock)(fun)
+
+
+  //JSON strings need to be escaped based on ECMA-404 standard http://json.org
+  def JSONEscapeString (s : String) : String = {
+    s.map {
+      case '"'  => "\\\""
+      case '\\' => "\\\\"
+      case '/'  => "\\/"
+      case '\b' => "\\b"
+      case '\f' => "\\f"
+      case '\n' => "\\n"
+      case '\r' => "\\r"
+      case '\t' => "\\t"
+      /* We'll unicode escape any control characters. These include:
+       * 0x0 -> 0x1f  : ASCII Control (C0 Control Codes)
+       * 0x7f         : ASCII DELETE
+       * 0x80 -> 0x9f : C1 Control Codes
+       *
+       * Per RFC4627, section 2.5, we're not technically required to
+       * encode the C1 codes, but we do to be safe.
+       */
+      case c if ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')) => "\\u%04x".format(c: Int)
+      case c => c
+    }.mkString
+  }
+
+  /**
+   * Returns a list of duplicated items
+   */
+  def duplicates[T](s: Traversable[T]): Iterable[T] = {
+    s.groupBy(identity)
+      .map{ case (k,l) => (k,l.size)}
+      .filter{ case (k,l) => (l > 1) }
+      .keys
+  }
 }
diff --git a/core/src/main/scala/kafka/utils/VerifiableProperties.scala b/core/src/main/scala/kafka/utils/VerifiableProperties.scala
index b070bb4df117d..2ffc7f452dc7a 100644
--- a/core/src/main/scala/kafka/utils/VerifiableProperties.scala
+++ b/core/src/main/scala/kafka/utils/VerifiableProperties.scala
@@ -20,6 +20,8 @@ package kafka.utils
 import java.util.Properties
 import java.util.Collections
 import scala.collection._
+import kafka.message.{CompressionCodec, NoCompressionCodec}
+
 
 class VerifiableProperties(val props: Properties) extends Logging {
   private val referenceSet = mutable.HashSet[String]()
@@ -33,7 +35,7 @@ class VerifiableProperties(val props: Properties) extends Logging {
   def getProperty(name: String): String = {
     val value = props.getProperty(name)
     referenceSet.add(name)
-    return value
+    if(value == null) value else value.trim()
   }
 
   /**
@@ -124,14 +126,14 @@ class VerifiableProperties(val props: Properties) extends Logging {
    * Get a required argument as a double
    * @param name The property name
    * @return the value
-   * @throw IllegalArgumentException If the given property is not present
+   * @throws IllegalArgumentException If the given property is not present
    */
   def getDouble(name: String): Double = getString(name).toDouble
   
   /**
    * Get an optional argument as a double
    * @param name The property name
-   * @default The default value for the property if not present
+   * @param default The default value for the property if not present
    */
   def getDouble(name: String, default: Double): Double = {
     if(containsKey(name))
@@ -193,6 +195,24 @@ class VerifiableProperties(val props: Properties) extends Logging {
     }
   }
 
+  /**
+   * Parse compression codec from a property list in either. Codecs may be specified as integers, or as strings.
+   * See [[kafka.message.CompressionCodec]] for more details.
+   * @param name The property name
+   * @param default Default compression codec
+   * @return compression codec
+   */
+  def getCompressionCodec(name: String, default: CompressionCodec) = {
+    val prop = getString(name, NoCompressionCodec.name)
+    try {
+      CompressionCodec.getCompressionCodec(prop.toInt)
+    }
+    catch {
+      case nfe: NumberFormatException =>
+        CompressionCodec.getCompressionCodec(prop)
+    }
+  }
+
   def verify() {
     info("Verifying properties")
     val propNames = {
diff --git a/core/src/main/scala/kafka/utils/ZkUtils.scala b/core/src/main/scala/kafka/utils/ZkUtils.scala
index fa86bb94475de..56e3e88e0cc6d 100644
--- a/core/src/main/scala/kafka/utils/ZkUtils.scala
+++ b/core/src/main/scala/kafka/utils/ZkUtils.scala
@@ -18,24 +18,21 @@
 package kafka.utils
 
 import kafka.cluster.{Broker, Cluster}
-import kafka.consumer.TopicCount
-import org.I0Itec.zkclient.{IZkDataListener, ZkClient}
-import org.I0Itec.zkclient.exception.{ZkNodeExistsException, ZkNoNodeException, ZkMarshallingError}
+import kafka.consumer.{ConsumerThreadId, TopicCount}
+import org.I0Itec.zkclient.ZkClient
+import org.I0Itec.zkclient.exception.{ZkNodeExistsException, ZkNoNodeException,
+  ZkMarshallingError, ZkBadVersionException}
 import org.I0Itec.zkclient.serialize.ZkSerializer
 import collection._
 import kafka.api.LeaderAndIsr
-import mutable.ListBuffer
 import org.apache.zookeeper.data.Stat
-import java.util.concurrent.locks.{ReentrantLock, Condition}
 import kafka.admin._
 import kafka.common.{KafkaException, NoEpochForPartitionException}
 import kafka.controller.ReassignedPartitionsContext
-import kafka.controller.PartitionAndReplica
 import kafka.controller.KafkaController
-import scala.{collection, Some}
+import scala.Some
 import kafka.controller.LeaderIsrAndControllerEpoch
 import kafka.common.TopicAndPartition
-import kafka.utils.Utils.inLock
 import scala.collection
 
 object ZkUtils extends Logging {
@@ -58,7 +55,7 @@ object ZkUtils extends Logging {
     getTopicPath(topic) + "/partitions"
   }
 
-  def getTopicConfigPath(topic: String): String = 
+  def getTopicConfigPath(topic: String): String =
     TopicConfigPath + "/" + topic
 
   def getDeleteTopicPath(topic: String): String =
@@ -85,43 +82,15 @@ object ZkUtils extends Logging {
     brokerIds.map(_.toInt).map(getBrokerInfo(zkClient, _)).filter(_.isDefined).map(_.get)
   }
 
-  def getLeaderIsrAndEpochForPartition(zkClient: ZkClient, topic: String, partition: Int):Option[LeaderIsrAndControllerEpoch] = {
-    val leaderAndIsrPath = getTopicPartitionLeaderAndIsrPath(topic, partition)
-    val leaderAndIsrInfo = readDataMaybeNull(zkClient, leaderAndIsrPath)
-    val leaderAndIsrOpt = leaderAndIsrInfo._1
-    val stat = leaderAndIsrInfo._2
-    leaderAndIsrOpt match {
-      case Some(leaderAndIsrStr) => parseLeaderAndIsr(leaderAndIsrStr, topic, partition, stat)
-      case None => None
-    }
-  }
-
   def getLeaderAndIsrForPartition(zkClient: ZkClient, topic: String, partition: Int):Option[LeaderAndIsr] = {
-    getLeaderIsrAndEpochForPartition(zkClient, topic, partition).map(_.leaderAndIsr)
+    ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topic, partition).map(_.leaderAndIsr)
   }
-  
+
   def setupCommonPaths(zkClient: ZkClient) {
-    for(path <- Seq(ConsumersPath, BrokerIdsPath, BrokerTopicsPath, TopicConfigChangesPath, TopicConfigPath))
+    for(path <- Seq(ConsumersPath, BrokerIdsPath, BrokerTopicsPath, TopicConfigChangesPath, TopicConfigPath, DeleteTopicsPath))
       makeSurePersistentPathExists(zkClient, path)
   }
 
-  def parseLeaderAndIsr(leaderAndIsrStr: String, topic: String, partition: Int, stat: Stat)
-  : Option[LeaderIsrAndControllerEpoch] = {
-    Json.parseFull(leaderAndIsrStr) match {
-      case Some(m) =>
-        val leaderIsrAndEpochInfo = m.asInstanceOf[Map[String, Any]]
-        val leader = leaderIsrAndEpochInfo.get("leader").get.asInstanceOf[Int]
-        val epoch = leaderIsrAndEpochInfo.get("leader_epoch").get.asInstanceOf[Int]
-        val isr = leaderIsrAndEpochInfo.get("isr").get.asInstanceOf[List[Int]]
-        val controllerEpoch = leaderIsrAndEpochInfo.get("controller_epoch").get.asInstanceOf[Int]
-        val zkPathVersion = stat.getVersion
-        debug("Leader %d, Epoch %d, Isr %s, Zk path version %d for partition [%s,%d]".format(leader, epoch,
-          isr.toString(), zkPathVersion, topic, partition))
-        Some(LeaderIsrAndControllerEpoch(LeaderAndIsr(leader, epoch, isr, zkPathVersion), controllerEpoch))
-      case None => None
-    }
-  }
-
   def getLeaderForPartition(zkClient: ZkClient, topic: String, partition: Int): Option[Int] = {
     val leaderAndIsrOpt = readDataMaybeNull(zkClient, getTopicPartitionLeaderAndIsrPath(topic, partition))._1
     leaderAndIsrOpt match {
@@ -210,6 +179,12 @@ object ZkUtils extends Logging {
     info("Registered broker %d at path %s with address %s:%d.".format(id, brokerIdPath, host, port))
   }
 
+  def deregisterBrokerInZk(zkClient: ZkClient, id: Int) {
+    val brokerIdPath = ZkUtils.BrokerIdsPath + "/" + id
+    deletePath(zkClient, brokerIdPath)
+    info("Deregistered broker %d at path %s.".format(id, brokerIdPath))
+  }
+
   def getConsumerPartitionOwnerPath(group: String, topic: String, partition: Int): String = {
     val topicDirs = new ZKGroupTopicDirs(group, topic)
     topicDirs.consumerOwnerDir + "/" + partition
@@ -373,17 +348,30 @@ object ZkUtils extends Logging {
   /**
    * Conditional update the persistent path data, return (true, newVersion) if it succeeds, otherwise (the path doesn't
    * exist, the current version is not the expected version, etc.) return (false, -1)
+   *
+   * When there is a ConnectionLossException during the conditional update, zkClient will retry the update and may fail
+   * since the previous update may have succeeded (but the stored zkVersion no longer matches the expected one).
+   * In this case, we will run the optionalChecker to further check if the previous write did indeed succeeded.
    */
-  def conditionalUpdatePersistentPath(client: ZkClient, path: String, data: String, expectVersion: Int): (Boolean, Int) = {
+  def conditionalUpdatePersistentPath(client: ZkClient, path: String, data: String, expectVersion: Int,
+    optionalChecker:Option[(ZkClient, String, String) => (Boolean,Int)] = None): (Boolean, Int) = {
     try {
       val stat = client.writeDataReturnStat(path, data, expectVersion)
       debug("Conditional update of path %s with value %s and expected version %d succeeded, returning the new version: %d"
         .format(path, data, expectVersion, stat.getVersion))
       (true, stat.getVersion)
     } catch {
-      case e: Exception =>
-        error("Conditional update of path %s with data %s and expected version %d failed due to %s".format(path, data,
-          expectVersion, e.getMessage))
+      case e1: ZkBadVersionException =>
+        optionalChecker match {
+          case Some(checker) => return checker(client, path, data)
+          case _ => debug("Checker method is not passed skipping zkData match")
+        }
+        warn("Conditional update of path %s with data %s and expected version %d failed due to %s".format(path, data,
+          expectVersion, e1.getMessage))
+        (false, -1)
+      case e2: Exception =>
+        warn("Conditional update of path %s with data %s and expected version %d failed due to %s".format(path, data,
+          expectVersion, e2.getMessage))
         (false, -1)
     }
   }
@@ -422,7 +410,7 @@ object ZkUtils extends Logging {
       case e2: Throwable => throw e2
     }
   }
-  
+
   def deletePath(client: ZkClient, path: String): Boolean = {
     try {
       client.delete(path)
@@ -445,7 +433,7 @@ object ZkUtils extends Logging {
       case e2: Throwable => throw e2
     }
   }
-  
+
   def maybeDeletePath(zkUrl: String, dir: String) {
     try {
       val zk = new ZkClient(zkUrl, 30*1000, 30*1000, ZKStringSerializer)
@@ -512,7 +500,7 @@ object ZkUtils extends Logging {
   : mutable.Map[TopicAndPartition, LeaderIsrAndControllerEpoch] = {
     val ret = new mutable.HashMap[TopicAndPartition, LeaderIsrAndControllerEpoch]
     for(topicAndPartition <- topicAndPartitions) {
-      ZkUtils.getLeaderIsrAndEpochForPartition(zkClient, topicAndPartition.topic, topicAndPartition.partition) match {
+      ReplicationUtils.getLeaderIsrAndEpochForPartition(zkClient, topicAndPartition.topic, topicAndPartition.partition) match {
         case Some(leaderIsrAndControllerEpoch) => ret.put(topicAndPartition, leaderIsrAndControllerEpoch)
         case None =>
       }
@@ -587,23 +575,28 @@ object ZkUtils extends Logging {
     }
   }
 
-  def parsePartitionReassignmentData(jsonData: String): Map[TopicAndPartition, Seq[Int]] = {
-    val reassignedPartitions: mutable.Map[TopicAndPartition, Seq[Int]] = mutable.Map()
+  // Parses without deduplicating keys so the the data can be checked before allowing reassignment to proceed
+  def parsePartitionReassignmentDataWithoutDedup(jsonData: String): Seq[(TopicAndPartition, Seq[Int])] = {
     Json.parseFull(jsonData) match {
       case Some(m) =>
         m.asInstanceOf[Map[String, Any]].get("partitions") match {
           case Some(partitionsSeq) =>
-            partitionsSeq.asInstanceOf[Seq[Map[String, Any]]].foreach(p => {
+            partitionsSeq.asInstanceOf[Seq[Map[String, Any]]].map(p => {
               val topic = p.get("topic").get.asInstanceOf[String]
               val partition = p.get("partition").get.asInstanceOf[Int]
               val newReplicas = p.get("replicas").get.asInstanceOf[Seq[Int]]
-              reassignedPartitions += TopicAndPartition(topic, partition) -> newReplicas
+              TopicAndPartition(topic, partition) -> newReplicas
             })
           case None =>
+            Seq.empty
         }
       case None =>
+        Seq.empty
     }
-    reassignedPartitions
+  }
+
+  def parsePartitionReassignmentData(jsonData: String): Map[TopicAndPartition, Seq[Int]] = {
+    parsePartitionReassignmentDataWithoutDedup(jsonData).toMap
   }
 
   def parseTopicsData(jsonData: String): Seq[String] = {
@@ -670,12 +663,12 @@ object ZkUtils extends Logging {
     getChildren(zkClient, dirs.consumerRegistryDir)
   }
 
-  def getConsumersPerTopic(zkClient: ZkClient, group: String) : mutable.Map[String, List[String]] = {
+  def getConsumersPerTopic(zkClient: ZkClient, group: String, excludeInternalTopics: Boolean) : mutable.Map[String, List[ConsumerThreadId]] = {
     val dirs = new ZKGroupDirs(group)
     val consumers = getChildrenParentMayNotExist(zkClient, dirs.consumerRegistryDir)
-    val consumersPerTopicMap = new mutable.HashMap[String, List[String]]
+    val consumersPerTopicMap = new mutable.HashMap[String, List[ConsumerThreadId]]
     for (consumer <- consumers) {
-      val topicCount = TopicCount.constructTopicCount(group, consumer, zkClient)
+      val topicCount = TopicCount.constructTopicCount(group, consumer, zkClient, excludeInternalTopics)
       for ((topic, consumerThreadIdSet) <- topicCount.getConsumerThreadIdsPerTopic) {
         for (consumerThreadId <- consumerThreadIdSet)
           consumersPerTopicMap.get(topic) match {
@@ -722,41 +715,6 @@ object ZkUtils extends Logging {
   }
 }
 
-class LeaderExistsOrChangedListener(topic: String,
-                                    partition: Int,
-                                    leaderLock: ReentrantLock,
-                                    leaderExistsOrChanged: Condition,
-                                    oldLeaderOpt: Option[Int] = None,
-                                    zkClient: ZkClient = null) extends IZkDataListener with Logging {
-  @throws(classOf[Exception])
-  def handleDataChange(dataPath: String, data: Object) {
-    val t = dataPath.split("/").takeRight(3).head
-    val p = dataPath.split("/").takeRight(2).head.toInt
-    inLock(leaderLock) {
-      if(t == topic && p == partition){
-        if(oldLeaderOpt == None){
-          trace("In leader existence listener on partition [%s, %d], leader has been created".format(topic, partition))
-          leaderExistsOrChanged.signal()
-        }
-        else {
-          val newLeaderOpt = ZkUtils.getLeaderForPartition(zkClient, t, p)
-          if(newLeaderOpt.isDefined && newLeaderOpt.get != oldLeaderOpt.get){
-            trace("In leader change listener on partition [%s, %d], leader has been moved from %d to %d".format(topic, partition, oldLeaderOpt.get, newLeaderOpt.get))
-            leaderExistsOrChanged.signal()
-          }
-        }
-      }
-    }
-  }
-
-  @throws(classOf[Exception])
-  def handleDataDeleted(dataPath: String) {
-    inLock(leaderLock) {
-      leaderExistsOrChanged.signal()
-    }
-  }
-}
-
 object ZKStringSerializer extends ZkSerializer {
 
   @throws(classOf[ZkMarshallingError])
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index d7d03ea1b1329..1b7d5d8f7d5fa 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -19,6 +19,7 @@ log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n
 
 log4j.logger.kafka=ERROR
+log4j.logger.org.apache.kafka=ERROR
 
 # zkclient can be verbose, during debugging it is common to adjust is separately
 log4j.logger.org.I0Itec.zkclient.ZkClient=WARN
diff --git a/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala
new file mode 100644
index 0000000000000..1505fd4464dc9
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.api.test
+
+import java.util.{Properties, Collection, ArrayList}
+
+import org.scalatest.junit.JUnit3Suite
+import org.junit.runners.Parameterized
+import org.junit.runner.RunWith
+import org.junit.runners.Parameterized.Parameters
+import org.junit.{After, Before, Test}
+import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer, ProducerConfig}
+import org.junit.Assert._
+
+import kafka.api.FetchRequestBuilder
+import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.consumer.SimpleConsumer
+import kafka.message.Message
+import kafka.zk.ZooKeeperTestHarness
+import kafka.utils.{Utils, TestUtils}
+
+import scala.Array
+
+
+@RunWith(value = classOf[Parameterized])
+class ProducerCompressionTest(compression: String) extends JUnit3Suite with ZooKeeperTestHarness {
+  private val brokerId = 0
+  private val port = TestUtils.choosePort
+  private var server: KafkaServer = null
+
+  private val props = TestUtils.createBrokerConfig(brokerId, port)
+  private val config = new KafkaConfig(props)
+
+  private val topic = "topic"
+  private val numRecords = 2000
+
+  @Before
+  override def setUp() {
+    super.setUp()
+    server = TestUtils.createServer(config)
+  }
+
+  @After
+  override def tearDown() {
+    server.shutdown
+    Utils.rm(server.config.logDirs)
+    super.tearDown()
+  }
+
+  /**
+   * testCompression
+   *
+   * Compressed messages should be able to sent and consumed correctly
+   */
+  @Test
+  def testCompression() {
+
+    val props = new Properties()
+    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, TestUtils.getBrokerListStrFromConfigs(Seq(config)))
+    props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, compression)
+    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "66000")
+    props.put(ProducerConfig.LINGER_MS_CONFIG, "200")
+    var producer = new KafkaProducer[Array[Byte],Array[Byte]](props)
+    val consumer = new SimpleConsumer("localhost", port, 100, 1024*1024, "")
+
+    try {
+      // create topic
+      TestUtils.createTopic(zkClient, topic, 1, 1, List(server))
+      val partition = 0
+
+      // prepare the messages
+      val messages = for (i <-0 until numRecords)
+        yield ("value" + i).getBytes
+
+      // make sure the returned messages are correct
+      val responses = for (message <- messages)
+        yield producer.send(new ProducerRecord[Array[Byte],Array[Byte]](topic, null, null, message))
+      val futures = responses.toList
+      for ((future, offset) <- futures zip (0 until numRecords)) {
+        assertEquals(offset.toLong, future.get.offset)
+      }
+
+      // make sure the fetched message count match
+      val fetchResponse = consumer.fetch(new FetchRequestBuilder().addFetch(topic, partition, 0, Int.MaxValue).build())
+      val messageSet = fetchResponse.messageSet(topic, partition).iterator.toBuffer
+      assertEquals("Should have fetched " + numRecords + " messages", numRecords, messageSet.size)
+
+      var index = 0
+      for (message <- messages) {
+        assertEquals(new Message(bytes = message), messageSet(index).message)
+        assertEquals(index.toLong, messageSet(index).offset)
+        index += 1
+      }
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+      if (consumer != null)
+        consumer.close()
+    }
+  }
+}
+
+object ProducerCompressionTest {
+
+  // NOTE: Must return collection of Array[AnyRef] (NOT Array[Any]).
+  @Parameters
+  def parameters: Collection[Array[String]] = {
+    val list = new ArrayList[Array[String]]()
+    list.add(Array("none"))
+    list.add(Array("gzip"))
+    list.add(Array("snappy"))
+    list.add(Array("lz4"))
+    list
+  }
+}
diff --git a/core/src/test/scala/integration/kafka/api/ProducerFailureHandlingTest.scala b/core/src/test/scala/integration/kafka/api/ProducerFailureHandlingTest.scala
new file mode 100644
index 0000000000000..5ec613cdb50b9
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/api/ProducerFailureHandlingTest.scala
@@ -0,0 +1,384 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.api.test
+
+import org.junit.Test
+import org.junit.Assert._
+
+import java.lang.Integer
+import java.util.{Properties, Random}
+import java.util.concurrent.{TimeoutException, TimeUnit, ExecutionException}
+
+import kafka.api.FetchRequestBuilder
+import kafka.common.Topic
+import kafka.consumer.SimpleConsumer
+import kafka.server.KafkaConfig
+import kafka.integration.KafkaServerTestHarness
+import kafka.utils.{TestZKUtils, ShutdownableThread, TestUtils}
+
+import org.apache.kafka.common.KafkaException
+import org.apache.kafka.common.errors.{InvalidTopicException, NotEnoughReplicasException}
+import org.apache.kafka.clients.producer._
+
+class ProducerFailureHandlingTest extends KafkaServerTestHarness {
+  private val producerBufferSize = 30000
+  private val serverMessageMaxBytes =  producerBufferSize/2
+
+  val numServers = 2
+  val configs =
+    for(props <- TestUtils.createBrokerConfigs(numServers, false))
+    yield new KafkaConfig(props) {
+      override val zkConnect = TestZKUtils.zookeeperConnect
+      override val autoCreateTopicsEnable = false
+      override val messageMaxBytes = serverMessageMaxBytes
+    }
+
+
+  private var consumer1: SimpleConsumer = null
+  private var consumer2: SimpleConsumer = null
+
+  private var producer1: KafkaProducer[Array[Byte],Array[Byte]] = null
+  private var producer2: KafkaProducer[Array[Byte],Array[Byte]] = null
+  private var producer3: KafkaProducer[Array[Byte],Array[Byte]] = null
+  private var producer4: KafkaProducer[Array[Byte],Array[Byte]] = null
+
+  private val topic1 = "topic-1"
+  private val topic2 = "topic-2"
+
+  override def setUp() {
+    super.setUp()
+
+    // TODO: we need to migrate to new consumers when 0.9 is final
+    consumer1 = new SimpleConsumer("localhost", configs(0).port, 100, 1024*1024, "")
+    consumer2 = new SimpleConsumer("localhost", configs(1).port, 100, 1024*1024, "")
+
+    producer1 = TestUtils.createNewProducer(brokerList, acks = 0, blockOnBufferFull = false, bufferSize = producerBufferSize)
+    producer2 = TestUtils.createNewProducer(brokerList, acks = 1, blockOnBufferFull = false, bufferSize = producerBufferSize)
+    producer3 = TestUtils.createNewProducer(brokerList, acks = -1, blockOnBufferFull = false, bufferSize = producerBufferSize)
+  }
+
+  override def tearDown() {
+    consumer1.close
+    consumer2.close
+
+    if (producer1 != null) producer1.close
+    if (producer2 != null) producer2.close
+    if (producer3 != null) producer3.close
+    if (producer4 != null) producer4.close
+
+    super.tearDown()
+  }
+
+  /**
+   * With ack == 0 the future metadata will have no exceptions with offset -1
+   */
+  @Test
+  def testTooLargeRecordWithAckZero() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    // send a too-large record
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, new Array[Byte](serverMessageMaxBytes + 1))
+    assertEquals("Returned metadata should have offset -1", producer1.send(record).get.offset, -1L)
+  }
+
+  /**
+   * With ack == 1 the future metadata will throw ExecutionException caused by RecordTooLargeException
+   */
+  @Test
+  def testTooLargeRecordWithAckOne() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    // send a too-large record
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, new Array[Byte](serverMessageMaxBytes + 1))
+    intercept[ExecutionException] {
+      producer2.send(record).get
+    }
+  }
+
+  /**
+   * With non-exist-topic the future metadata should return ExecutionException caused by TimeoutException
+   */
+  @Test
+  def testNonExistentTopic() {
+    // send a record with non-exist topic
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic2, null, "key".getBytes, "value".getBytes)
+    intercept[ExecutionException] {
+      producer1.send(record).get
+    }
+  }
+
+  /**
+   * With incorrect broker-list the future metadata should return ExecutionException caused by TimeoutException
+   *
+   * TODO: other exceptions that can be thrown in ExecutionException:
+   *    UnknownTopicOrPartitionException
+   *    NotLeaderForPartitionException
+   *    LeaderNotAvailableException
+   *    CorruptRecordException
+   *    TimeoutException
+   */
+  @Test
+  def testWrongBrokerList() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    // producer with incorrect broker list
+    producer4 = TestUtils.createNewProducer("localhost:8686,localhost:4242", acks = 1, blockOnBufferFull = false, bufferSize = producerBufferSize)
+
+    // send a record with incorrect broker list
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, "value".getBytes)
+    intercept[ExecutionException] {
+      producer4.send(record).get
+    }
+  }
+
+  /**
+   * 1. With ack=0, the future metadata should not be blocked.
+   * 2. With ack=1, the future metadata should block,
+   *    and subsequent calls will eventually cause buffer full
+   */
+  @Test
+  def testNoResponse() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    // first send a message to make sure the metadata is refreshed
+    val record1 = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, "value".getBytes)
+    producer1.send(record1).get
+    producer2.send(record1).get
+
+    // stop IO threads and request handling, but leave networking operational
+    // any requests should be accepted and queue up, but not handled
+    servers.foreach(server => server.requestHandlerPool.shutdown())
+
+    producer1.send(record1).get(5000, TimeUnit.MILLISECONDS)
+
+    intercept[TimeoutException] {
+      producer2.send(record1).get(5000, TimeUnit.MILLISECONDS)
+    }
+
+    // TODO: expose producer configs after creating them
+    // send enough messages to get buffer full
+    val tooManyRecords = 10
+    val msgSize = producerBufferSize / tooManyRecords
+    val value = new Array[Byte](msgSize)
+    new Random().nextBytes(value)
+    val record2 = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, value)
+
+    intercept[KafkaException] {
+      for (i <- 1 to tooManyRecords)
+        producer2.send(record2)
+    }
+
+    // do not close produce2 since it will block
+    // TODO: can we do better?
+    producer2 = null
+  }
+
+  /**
+   *  The send call with invalid partition id should throw KafkaException caused by IllegalArgumentException
+   */
+  @Test
+  def testInvalidPartition() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    // create a record with incorrect partition id, send should fail
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic1, new Integer(1), "key".getBytes, "value".getBytes)
+    intercept[IllegalArgumentException] {
+      producer1.send(record)
+    }
+    intercept[IllegalArgumentException] {
+      producer2.send(record)
+    }
+    intercept[IllegalArgumentException] {
+      producer3.send(record)
+    }
+  }
+
+  /**
+   * The send call after producer closed should throw KafkaException cased by IllegalStateException
+   */
+  @Test
+  def testSendAfterClosed() {
+    // create topic
+    TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, "key".getBytes, "value".getBytes)
+
+    // first send a message to make sure the metadata is refreshed
+    producer1.send(record).get
+    producer2.send(record).get
+    producer3.send(record).get
+
+    intercept[IllegalStateException] {
+      producer1.close
+      producer1.send(record)
+    }
+    intercept[IllegalStateException] {
+      producer2.close
+      producer2.send(record)
+    }
+    intercept[IllegalStateException] {
+      producer3.close
+      producer3.send(record)
+    }
+
+    // re-close producer is fine
+  }
+
+  /**
+   * With replication, producer should able able to find new leader after it detects broker failure
+   */
+  @Test
+  def testBrokerFailure() {
+    // create topic
+    val leaders = TestUtils.createTopic(zkClient, topic1, 1, 2, servers)
+    val partition = 0
+    assertTrue("Leader of partition 0 of the topic should exist", leaders(partition).isDefined)
+
+    val scheduler = new ProducerScheduler()
+    scheduler.start
+
+    // rolling bounce brokers
+    for (i <- 0 until 2) {
+      for (server <- servers) {
+        server.shutdown()
+        server.awaitShutdown()
+        server.startup
+
+        Thread.sleep(2000)
+      }
+
+      // Make sure the producer do not see any exception
+      // in returned metadata due to broker failures
+      assertTrue(scheduler.failed == false)
+
+      // Make sure the leader still exists after bouncing brokers
+      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic1, partition)
+    }
+
+    scheduler.shutdown
+
+    // Make sure the producer do not see any exception
+    // when draining the left messages on shutdown
+    assertTrue(scheduler.failed == false)
+
+    // double check that the leader info has been propagated after consecutive bounces
+    val leader = TestUtils.waitUntilMetadataIsPropagated(servers, topic1, partition)
+
+    val fetchResponse = if(leader == configs(0).brokerId) {
+      consumer1.fetch(new FetchRequestBuilder().addFetch(topic1, partition, 0, Int.MaxValue).build()).messageSet(topic1, partition)
+    } else {
+      consumer2.fetch(new FetchRequestBuilder().addFetch(topic1, partition, 0, Int.MaxValue).build()).messageSet(topic1, partition)
+    }
+
+    val messages = fetchResponse.iterator.toList.map(_.message)
+    val uniqueMessages = messages.toSet
+    val uniqueMessageSize = uniqueMessages.size
+
+    assertEquals("Should have fetched " + scheduler.sent + " unique messages", scheduler.sent, uniqueMessageSize)
+  }
+
+  @Test
+  def testCannotSendToInternalTopic() {
+    val thrown = intercept[ExecutionException] {
+      producer2.send(new ProducerRecord[Array[Byte],Array[Byte]](Topic.InternalTopics.head, "test".getBytes, "test".getBytes)).get
+    }
+    assertTrue(thrown.getCause.isInstanceOf[InvalidTopicException])
+  }
+
+  @Test
+  def testNotEnoughReplicas() {
+    val topicName = "minisrtest"
+    val topicProps = new Properties()
+    topicProps.put("min.insync.replicas","3")
+
+    TestUtils.createTopic(zkClient, topicName, 1, 2, servers,topicProps)
+
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topicName, null, "key".getBytes, "value".getBytes)
+    try {
+      producer3.send(record).get
+      fail("Expected exception when producing to topic with fewer brokers than min.insync.replicas")
+    } catch {
+      case e: ExecutionException =>
+        if (!e.getCause.isInstanceOf[NotEnoughReplicasException]) {
+          fail("Expected NotEnoughReplicasException when producing to topic with fewer brokers than min.insync.replicas")
+        }
+    }
+  }
+
+  @Test
+  def testNotEnoughReplicasAfterBrokerShutdown() {
+    val topicName = "minisrtest2"
+    val topicProps = new Properties()
+    topicProps.put("min.insync.replicas","2")
+
+    TestUtils.createTopic(zkClient, topicName, 1, 2, servers,topicProps)
+
+    val record = new ProducerRecord[Array[Byte],Array[Byte]](topicName, null, "key".getBytes, "value".getBytes)
+    // this should work with all brokers up and running
+    producer3.send(record).get
+
+    // shut down one broker
+    servers.head.shutdown()
+    servers.head.awaitShutdown()
+    try {
+      producer3.send(record).get
+      fail("Expected exception when producing to topic with fewer brokers than min.insync.replicas")
+    } catch {
+      case e: ExecutionException =>
+        if (!e.getCause.isInstanceOf[NotEnoughReplicasException]) {
+          fail("Expected NotEnoughReplicasException when producing to topic with fewer brokers than min.insync.replicas")
+        }
+    }
+
+    // restart the server
+    servers.head.startup()
+  }
+
+  private class ProducerScheduler extends ShutdownableThread("daemon-producer", false)
+  {
+    val numRecords = 1000
+    var sent = 0
+    var failed = false
+
+    val producer = TestUtils.createNewProducer(brokerList, bufferSize = producerBufferSize, retries = 10)
+
+    override def doWork(): Unit = {
+      val responses =
+        for (i <- sent+1 to sent+numRecords)
+        yield producer.send(new ProducerRecord[Array[Byte],Array[Byte]](topic1, null, null, i.toString.getBytes))
+      val futures = responses.toList
+
+      try {
+        futures.map(_.get)
+        sent += numRecords
+      } catch {
+        case e : Exception => failed = true
+      }
+    }
+
+    override def shutdown(){
+      super.shutdown()
+      producer.close
+    }
+  }
+}
diff --git a/core/src/test/scala/integration/kafka/api/ProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/ProducerSendTest.scala
new file mode 100644
index 0000000000000..6196060edf9f1
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/api/ProducerSendTest.scala
@@ -0,0 +1,244 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.api.test
+
+import java.lang.{Integer, IllegalArgumentException}
+
+import org.apache.kafka.clients.producer._
+import org.scalatest.junit.JUnit3Suite
+import org.junit.Test
+import org.junit.Assert._
+
+import kafka.server.KafkaConfig
+import kafka.utils.{TestZKUtils, TestUtils}
+import kafka.consumer.SimpleConsumer
+import kafka.api.FetchRequestBuilder
+import kafka.message.Message
+import kafka.integration.KafkaServerTestHarness
+
+
+class ProducerSendTest extends JUnit3Suite with KafkaServerTestHarness {
+  val numServers = 2
+  val configs =
+    for(props <- TestUtils.createBrokerConfigs(numServers, false))
+    yield new KafkaConfig(props) {
+      override val zkConnect = TestZKUtils.zookeeperConnect
+      override val numPartitions = 4
+    }
+
+  private var consumer1: SimpleConsumer = null
+  private var consumer2: SimpleConsumer = null
+
+  private val topic = "topic"
+  private val numRecords = 100
+
+  override def setUp() {
+    super.setUp()
+
+    // TODO: we need to migrate to new consumers when 0.9 is final
+    consumer1 = new SimpleConsumer("localhost", configs(0).port, 100, 1024*1024, "")
+    consumer2 = new SimpleConsumer("localhost", configs(1).port, 100, 1024*1024, "")
+  }
+
+  override def tearDown() {
+    consumer1.close()
+    consumer2.close()
+
+    super.tearDown()
+  }
+
+  class CheckErrorCallback extends Callback {
+    def onCompletion(metadata: RecordMetadata, exception: Exception) {
+      if (exception != null)
+        fail("Send callback returns the following exception", exception)
+    }
+  }
+
+  /**
+   * testSendOffset checks the basic send API behavior
+   *
+   * 1. Send with null key/value/partition-id should be accepted; send with null topic should be rejected.
+   * 2. Last message of the non-blocking send should return the correct offset metadata
+   */
+  @Test
+  def testSendOffset() {
+    var producer = TestUtils.createNewProducer(brokerList)
+
+    val callback = new CheckErrorCallback
+
+    try {
+      // create topic
+      TestUtils.createTopic(zkClient, topic, 1, 2, servers)
+
+      // send a normal record
+      val record0 = new ProducerRecord[Array[Byte],Array[Byte]](topic, new Integer(0), "key".getBytes, "value".getBytes)
+      assertEquals("Should have offset 0", 0L, producer.send(record0, callback).get.offset)
+
+      // send a record with null value should be ok
+      val record1 = new ProducerRecord[Array[Byte],Array[Byte]](topic, new Integer(0), "key".getBytes, null)
+      assertEquals("Should have offset 1", 1L, producer.send(record1, callback).get.offset)
+
+      // send a record with null key should be ok
+      val record2 = new ProducerRecord[Array[Byte],Array[Byte]](topic, new Integer(0), null, "value".getBytes)
+      assertEquals("Should have offset 2", 2L, producer.send(record2, callback).get.offset)
+
+      // send a record with null part id should be ok
+      val record3 = new ProducerRecord[Array[Byte],Array[Byte]](topic, null, "key".getBytes, "value".getBytes)
+      assertEquals("Should have offset 3", 3L, producer.send(record3, callback).get.offset)
+
+      // send a record with null topic should fail
+      try {
+        val record4 = new ProducerRecord[Array[Byte],Array[Byte]](null, new Integer(0), "key".getBytes, "value".getBytes)
+        producer.send(record4, callback)
+        fail("Should not allow sending a record without topic")
+      } catch {
+        case iae: IllegalArgumentException => // this is ok
+        case e: Throwable => fail("Only expecting IllegalArgumentException", e)
+      }
+
+      // non-blocking send a list of records
+      for (i <- 1 to numRecords)
+        producer.send(record0)
+
+      // check that all messages have been acked via offset
+      assertEquals("Should have offset " + (numRecords + 4), numRecords + 4L, producer.send(record0, callback).get.offset)
+
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+    }
+  }
+
+  /**
+   * testClose checks the closing behavior
+   *
+   * After close() returns, all messages should be sent with correct returned offset metadata
+   */
+  @Test
+  def testClose() {
+    var producer = TestUtils.createNewProducer(brokerList)
+
+    try {
+      // create topic
+      TestUtils.createTopic(zkClient, topic, 1, 2, servers)
+
+      // non-blocking send a list of records
+      val record0 = new ProducerRecord[Array[Byte],Array[Byte]](topic, null, "key".getBytes, "value".getBytes)
+      for (i <- 1 to numRecords)
+        producer.send(record0)
+      val response0 = producer.send(record0)
+
+      // close the producer
+      producer.close()
+      producer = null
+
+      // check that all messages have been acked via offset,
+      // this also checks that messages with same key go to the same partition
+      assertTrue("The last message should be acked before producer is shutdown", response0.isDone)
+      assertEquals("Should have offset " + numRecords, numRecords.toLong, response0.get.offset)
+
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+    }
+  }
+
+  /**
+   * testSendToPartition checks the partitioning behavior
+   *
+   * The specified partition-id should be respected
+   */
+  @Test
+  def testSendToPartition() {
+    var producer = TestUtils.createNewProducer(brokerList)
+
+    try {
+      // create topic
+      val leaders = TestUtils.createTopic(zkClient, topic, 2, 2, servers)
+      val partition = 1
+
+      // make sure leaders exist
+      val leader1 = leaders(partition)
+      assertTrue("Leader for topic \"topic\" partition 1 should exist", leader1.isDefined)
+
+      val responses =
+        for (i <- 1 to numRecords)
+        yield producer.send(new ProducerRecord[Array[Byte],Array[Byte]](topic, partition, null, ("value" + i).getBytes))
+      val futures = responses.toList
+      futures.map(_.get)
+      for (future <- futures)
+        assertTrue("Request should have completed", future.isDone)
+
+      // make sure all of them end up in the same partition with increasing offset values
+      for ((future, offset) <- futures zip (0 until numRecords)) {
+        assertEquals(offset.toLong, future.get.offset)
+        assertEquals(topic, future.get.topic)
+        assertEquals(partition, future.get.partition)
+      }
+
+      // make sure the fetched messages also respect the partitioning and ordering
+      val fetchResponse1 = if(leader1.get == configs(0).brokerId) {
+        consumer1.fetch(new FetchRequestBuilder().addFetch(topic, partition, 0, Int.MaxValue).build())
+      } else {
+        consumer2.fetch(new FetchRequestBuilder().addFetch(topic, partition, 0, Int.MaxValue).build())
+      }
+      val messageSet1 = fetchResponse1.messageSet(topic, partition).iterator.toBuffer
+      assertEquals("Should have fetched " + numRecords + " messages", numRecords, messageSet1.size)
+
+      // TODO: also check topic and partition after they are added in the return messageSet
+      for (i <- 0 to numRecords - 1) {
+        assertEquals(new Message(bytes = ("value" + (i + 1)).getBytes), messageSet1(i).message)
+        assertEquals(i.toLong, messageSet1(i).offset)
+      }
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+    }
+  }
+
+  /**
+   * testAutoCreateTopic
+   *
+   * The topic should be created upon sending the first message
+   */
+  @Test
+  def testAutoCreateTopic() {
+    var producer = TestUtils.createNewProducer(brokerList, retries = 5)
+
+    try {
+      // Send a message to auto-create the topic
+      val record = new ProducerRecord[Array[Byte],Array[Byte]](topic, null, "key".getBytes, "value".getBytes)
+      assertEquals("Should have offset 0", 0L, producer.send(record).get.offset)
+
+      // double check that the topic is created with leader elected
+      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+    }
+  }
+}
diff --git a/core/src/test/scala/kafka/log/LogConfigTest.scala b/core/src/test/scala/kafka/log/LogConfigTest.scala
new file mode 100644
index 0000000000000..99b0df7b69c5e
--- /dev/null
+++ b/core/src/test/scala/kafka/log/LogConfigTest.scala
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.log
+
+import org.apache.kafka.common.config.ConfigException
+import org.scalatest.junit.JUnit3Suite
+import org.junit.{Assert, Test}
+import java.util.Properties
+
+class LogConfigTest extends JUnit3Suite {
+
+  @Test
+  def testFromPropsDefaults() {
+    val defaults = new Properties()
+    defaults.put(LogConfig.SegmentBytesProp, "4242")
+    val props = new Properties(defaults)
+
+    val config = LogConfig.fromProps(props)
+
+    Assert.assertEquals(4242, config.segmentSize)
+    Assert.assertEquals("LogConfig defaults should be retained", Defaults.MaxMessageSize, config.maxMessageSize)
+  }
+
+  @Test
+  def testFromPropsEmpty() {
+    val p = new Properties()
+    val config = LogConfig.fromProps(p)
+    Assert.assertEquals(LogConfig(), config)
+  }
+
+  @Test
+  def testFromPropsToProps() {
+    import scala.util.Random._
+    val expected = new Properties()
+    LogConfig.configNames().foreach((name) => {
+      name match {
+        case LogConfig.UncleanLeaderElectionEnableProp => expected.setProperty(name, randFrom("true", "false"))
+        case LogConfig.CleanupPolicyProp => expected.setProperty(name, randFrom(LogConfig.Compact, LogConfig.Delete))
+        case LogConfig.MinCleanableDirtyRatioProp => expected.setProperty(name, "%.1f".format(nextDouble * .9 + .1))
+        case LogConfig.MinInSyncReplicasProp => expected.setProperty(name, (nextInt(Int.MaxValue - 1) + 1).toString)
+        case LogConfig.RetentionBytesProp => expected.setProperty(name, nextInt().toString)
+        case positiveIntProperty => expected.setProperty(name, nextInt(Int.MaxValue).toString)
+      }
+    })
+
+    val actual = LogConfig.fromProps(expected).toProps
+    Assert.assertEquals(expected, actual)
+  }
+
+  @Test
+  def testFromPropsInvalid() {
+    LogConfig.configNames().foreach((name) => {
+      name match {
+        case LogConfig.UncleanLeaderElectionEnableProp  => return
+        case LogConfig.RetentionBytesProp => assertPropertyInvalid(name, "not_a_number")
+        case LogConfig.CleanupPolicyProp => assertPropertyInvalid(name, "true", "foobar");
+        case LogConfig.MinCleanableDirtyRatioProp => assertPropertyInvalid(name, "not_a_number", "-0.1", "1.2")
+        case LogConfig.MinInSyncReplicasProp => assertPropertyInvalid(name, "not_a_number", "0", "-1")
+        case positiveIntProperty => assertPropertyInvalid(name, "not_a_number", "-1")
+      }
+    })
+   }
+
+  private def assertPropertyInvalid(name: String, values: AnyRef*) {
+    values.foreach((value) => {
+      val props = new Properties
+      props.setProperty(name, value.toString)
+      intercept[ConfigException] {
+        LogConfig.fromProps(props)
+      }
+    })
+  }
+
+  private def randFrom[T](choices: T*): T = {
+    import scala.util.Random
+    choices(Random.nextInt(choices.size))
+  }
+}
diff --git a/core/src/test/scala/other/kafka/StressTestLog.scala b/core/src/test/scala/other/kafka/StressTestLog.scala
index 8fcd068b24868..e19b8b2838355 100644
--- a/core/src/test/scala/other/kafka/StressTestLog.scala
+++ b/core/src/test/scala/other/kafka/StressTestLog.scala
@@ -91,7 +91,7 @@ object StressTestLog {
     @volatile var offset = 0
     override def work() {
       try {
-        log.read(offset, 1024, Some(offset+1)) match {
+        log.read(offset, 1024, Some(offset+1)).messageSet match {
           case read: FileMessageSet if read.sizeInBytes > 0 => {
             val first = read.head
             require(first.offset == offset, "We should either read nothing or the message we asked for.")
diff --git a/core/src/test/scala/other/kafka/TestEndToEndLatency.scala b/core/src/test/scala/other/kafka/TestEndToEndLatency.scala
deleted file mode 100644
index c4aed10f50ca5..0000000000000
--- a/core/src/test/scala/other/kafka/TestEndToEndLatency.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka
-
-import java.util.Properties
-import kafka.consumer._
-import kafka.producer._
-import kafka.message._
-
-object TestEndToEndLatency {
-  def main(args: Array[String]) {
-    if(args.length != 3) {
-      System.err.println("USAGE: java " + getClass().getName + " broker_list zookeeper_connect num_messages")
-      System.exit(1)
-    }
-
-    val brokerList = args(0)
-    val zkConnect = args(1)
-    val numMessages = args(2).toInt
-    val topic = "test"
-    
-    val consumerProps = new Properties()
-    consumerProps.put("group.id", topic)
-    consumerProps.put("auto.commit", "true")
-    consumerProps.put("auto.offset.reset", "largest")
-    consumerProps.put("zookeeper.connect", zkConnect)
-    consumerProps.put("socket.timeout.ms", 1201000.toString)
-    
-    val config = new ConsumerConfig(consumerProps)
-    val connector = Consumer.create(config)
-    var stream = connector.createMessageStreams(Map(topic -> 1)).get(topic).head.head
-    val iter = stream.iterator
-
-    val producerProps = new Properties()
-    producerProps.put("metadata.broker.list", brokerList)
-    producerProps.put("producer.type", "sync")
-    val producer = new Producer[Any, Any](new ProducerConfig(producerProps))
-    
-    val message = new Message("hello there beautiful".getBytes)
-    var totalTime = 0.0
-    for(i <- 0 until numMessages) {
-      var begin = System.nanoTime
-      producer.send(new KeyedMessage(topic, message))
-      val received = iter.next
-      val elapsed = System.nanoTime - begin
-      // poor man's progress bar
-      if(i % 10000 == 0)
-        println(i + "\t" + elapsed / 1000.0 / 1000.0)
-      totalTime += elapsed
-    }
-    println("Avg latency: " + (totalTime / numMessages / 1000.0 / 1000.0) + "ms")
-    producer.close()
-    connector.shutdown()
-    System.exit(0)
-  }
-}
\ No newline at end of file
diff --git a/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala b/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala
index eeb8c8856200c..7211c2529c1db 100644
--- a/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala
+++ b/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala
@@ -83,13 +83,7 @@ object TestLinearWriteSpeed {
                           
     val options = parser.parse(args : _*)
     
-    for(arg <- List(bytesOpt, sizeOpt, filesOpt)) {
-      if(!options.has(arg)) {
-        System.err.println("Missing required argument \"" + arg + "\"") 
-        parser.printHelpOn(System.err)
-        System.exit(1)
-      }
-    }
+    CommandLineUtils.checkRequiredArgs(parser, options, bytesOpt, sizeOpt, filesOpt)
 
     var bytesToWrite = options.valueOf(bytesOpt).longValue
     val bufferSize = options.valueOf(sizeOpt).intValue
diff --git a/core/src/test/scala/other/kafka/TestOffsetManager.scala b/core/src/test/scala/other/kafka/TestOffsetManager.scala
new file mode 100644
index 0000000000000..41f334d48897b
--- /dev/null
+++ b/core/src/test/scala/other/kafka/TestOffsetManager.scala
@@ -0,0 +1,291 @@
+package other.kafka
+
+import org.I0Itec.zkclient.ZkClient
+import kafka.api._
+import kafka.utils.{ShutdownableThread, ZKStringSerializer}
+import scala.collection._
+import kafka.client.ClientUtils
+import joptsimple.OptionParser
+import kafka.common.{ErrorMapping, OffsetAndMetadata, TopicAndPartition}
+import kafka.network.BlockingChannel
+import scala.util.Random
+import java.io.IOException
+import kafka.metrics.{KafkaTimer, KafkaMetricsGroup}
+import java.util.concurrent.TimeUnit
+import com.yammer.metrics.core.Gauge
+import java.util.concurrent.atomic.AtomicInteger
+import java.nio.channels.ClosedByInterruptException
+
+
+object TestOffsetManager {
+
+  val random = new Random
+  val SocketTimeoutMs = 10000
+
+  class StatsThread(reportingIntervalMs: Long, commitThreads: Seq[CommitThread], fetchThread: FetchThread)
+        extends ShutdownableThread("stats-thread") {
+
+    def printStats() {
+      println("--------------------------------------------------------------------------------")
+      println("Aggregate stats for commits:")
+      println("Error count: %d; Max:%f; Min: %f; Mean: %f; Commit count: %d".format(
+        commitThreads.map(_.numErrors.get).sum,
+        commitThreads.map(_.timer.max()).max,
+        commitThreads.map(_.timer.min()).min,
+        commitThreads.map(_.timer.mean()).sum / commitThreads.size,
+        commitThreads.map(_.numCommits.get).sum))
+      println("--------------------------------------------------------------------------------")
+      commitThreads.foreach(t => println(t.stats))
+      println(fetchThread.stats)
+    }
+
+    override def doWork() {
+      printStats()
+      Thread.sleep(reportingIntervalMs)
+    }
+
+  }
+
+  class CommitThread(id: Int, partitionCount: Int, commitIntervalMs: Long, zkClient: ZkClient)
+        extends ShutdownableThread("commit-thread")
+        with KafkaMetricsGroup {
+
+    private val group = "group-" + id
+    private val metadata = "Metadata from commit thread " + id
+    private var offsetsChannel = ClientUtils.channelToOffsetManager(group, zkClient, SocketTimeoutMs)
+    private var offset = 0L
+    val numErrors = new AtomicInteger(0)
+    val numCommits = new AtomicInteger(0)
+    val timer = newTimer("commit-thread", TimeUnit.MILLISECONDS, TimeUnit.SECONDS)
+    private val commitTimer = new KafkaTimer(timer)
+    val shutdownLock = new Object
+
+    private def ensureConnected() {
+      if (!offsetsChannel.isConnected)
+        offsetsChannel = ClientUtils.channelToOffsetManager(group, zkClient, SocketTimeoutMs)
+    }
+
+    override def doWork() {
+      val commitRequest = OffsetCommitRequest(group, immutable.Map((1 to partitionCount).map(TopicAndPartition("topic-" + id, _) -> OffsetAndMetadata(offset, metadata)):_*))
+      try {
+        ensureConnected()
+        offsetsChannel.send(commitRequest)
+        numCommits.getAndIncrement
+        commitTimer.time {
+          val response = OffsetCommitResponse.readFrom(offsetsChannel.receive().buffer)
+          if (response.commitStatus.exists(_._2 != ErrorMapping.NoError)) numErrors.getAndIncrement
+        }
+        offset += 1
+      }
+      catch {
+        case e1: ClosedByInterruptException =>
+          offsetsChannel.disconnect()
+        case e2: IOException =>
+          println("Commit thread %d: Error while committing offsets to %s:%d for group %s due to %s.".format(id, offsetsChannel.host, offsetsChannel.port, group, e2))
+          offsetsChannel.disconnect()
+      }
+      finally {
+        Thread.sleep(commitIntervalMs)
+      }
+    }
+
+    override def shutdown() {
+      super.shutdown()
+      awaitShutdown()
+      offsetsChannel.disconnect()
+      println("Commit thread %d ended. Last committed offset: %d.".format(id, offset))
+    }
+
+    def stats = {
+      "Commit thread %d :: Error count: %d; Max:%f; Min: %f; Mean: %f; Commit count: %d"
+      .format(id, numErrors.get(), timer.max(), timer.min(), timer.mean(), numCommits.get())
+    }
+  }
+
+  class FetchThread(numGroups: Int, fetchIntervalMs: Long, zkClient: ZkClient)
+        extends ShutdownableThread("fetch-thread")
+        with KafkaMetricsGroup {
+
+    private val timer = newTimer("fetch-thread", TimeUnit.MILLISECONDS, TimeUnit.SECONDS)
+    private val fetchTimer = new KafkaTimer(timer)
+
+    private val channels = mutable.Map[Int, BlockingChannel]()
+    private var metadataChannel = ClientUtils.channelToAnyBroker(zkClient, SocketTimeoutMs)
+
+    private val numErrors = new AtomicInteger(0)
+
+    override def doWork() {
+      val id = random.nextInt().abs % numGroups
+      val group = "group-" + id
+      try {
+        metadataChannel.send(ConsumerMetadataRequest(group))
+        val coordinatorId = ConsumerMetadataResponse.readFrom(metadataChannel.receive().buffer).coordinatorOpt.map(_.id).getOrElse(-1)
+
+        val channel = if (channels.contains(coordinatorId))
+          channels(coordinatorId)
+        else {
+          val newChannel = ClientUtils.channelToOffsetManager(group, zkClient, SocketTimeoutMs)
+          channels.put(coordinatorId, newChannel)
+          newChannel
+        }
+
+        try {
+          // send the offset fetch request
+          val fetchRequest = OffsetFetchRequest(group, Seq(TopicAndPartition("topic-"+id, 1)))
+          channel.send(fetchRequest)
+
+          fetchTimer.time {
+            val response = OffsetFetchResponse.readFrom(channel.receive().buffer)
+            if (response.requestInfo.exists(_._2.error != ErrorMapping.NoError)) {
+              numErrors.getAndIncrement
+            }
+          }
+        }
+        catch {
+          case e1: ClosedByInterruptException =>
+            channel.disconnect()
+            channels.remove(coordinatorId)
+          case e2: IOException =>
+            println("Error while fetching offset from %s:%d due to %s.".format(channel.host, channel.port, e2))
+            channel.disconnect()
+            channels.remove(coordinatorId)
+        }
+      }
+      catch {
+        case e: IOException =>
+          println("Error while querying %s:%d - shutting down query channel.".format(metadataChannel.host, metadataChannel.port))
+          metadataChannel.disconnect()
+          println("Creating new query channel.")
+          metadataChannel = ClientUtils.channelToAnyBroker(zkClient, SocketTimeoutMs)
+      }
+      finally {
+        Thread.sleep(fetchIntervalMs)
+      }
+
+    }
+
+    override def shutdown() {
+      super.shutdown()
+      awaitShutdown()
+      channels.foreach(_._2.disconnect())
+      metadataChannel.disconnect()
+    }
+
+    def stats = {
+      "Fetch thread :: Error count: %d; Max:%f; Min: %f; Mean: %f; Fetch count: %d"
+      .format(numErrors.get(), timer.max(), timer.min(), timer.mean(), timer.count())
+    }
+  }
+
+  def main(args: Array[String]) {
+    val parser = new OptionParser
+    val zookeeperOpt = parser.accepts("zookeeper", "The ZooKeeper connection URL.")
+      .withRequiredArg
+      .describedAs("ZooKeeper URL")
+      .ofType(classOf[java.lang.String])
+      .defaultsTo("localhost:2181")
+
+    val commitIntervalOpt = parser.accepts("commit-interval-ms", "Offset commit interval.")
+      .withRequiredArg
+      .describedAs("interval")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(100)
+
+    val fetchIntervalOpt = parser.accepts("fetch-interval-ms", "Offset fetch interval.")
+      .withRequiredArg
+      .describedAs("interval")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1000)
+
+    val numPartitionsOpt = parser.accepts("partition-count", "Number of partitions per commit.")
+      .withRequiredArg
+      .describedAs("interval")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1)
+
+    val numThreadsOpt = parser.accepts("thread-count", "Number of commit threads.")
+      .withRequiredArg
+      .describedAs("threads")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(1)
+
+    val reportingIntervalOpt = parser.accepts("reporting-interval-ms", "Interval at which stats are reported.")
+      .withRequiredArg
+      .describedAs("interval (ms)")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(3000)
+
+    val helpOpt = parser.accepts("help", "Print this message.")
+
+    val options = parser.parse(args : _*)
+
+    if (options.has(helpOpt)) {
+      parser.printHelpOn(System.out)
+      System.exit(0)
+    }
+
+    val commitIntervalMs = options.valueOf(commitIntervalOpt).intValue()
+    val fetchIntervalMs = options.valueOf(fetchIntervalOpt).intValue()
+    val threadCount = options.valueOf(numThreadsOpt).intValue()
+    val partitionCount = options.valueOf(numPartitionsOpt).intValue()
+    val zookeeper = options.valueOf(zookeeperOpt)
+    val reportingIntervalMs = options.valueOf(reportingIntervalOpt).intValue()
+    println("Commit thread count: %d; Partition count: %d, Commit interval: %d ms; Fetch interval: %d ms; Reporting interval: %d ms"
+            .format(threadCount, partitionCount, commitIntervalMs, fetchIntervalMs, reportingIntervalMs))
+
+    var zkClient: ZkClient = null
+    var commitThreads: Seq[CommitThread] = Seq()
+    var fetchThread: FetchThread = null
+    var statsThread: StatsThread = null
+    try {
+      zkClient = new ZkClient(zookeeper, 6000, 2000, ZKStringSerializer)
+      commitThreads = (0 to (threadCount-1)).map { threadId =>
+        new CommitThread(threadId, partitionCount, commitIntervalMs, zkClient)
+      }
+
+      fetchThread = new FetchThread(threadCount, fetchIntervalMs, zkClient)
+
+      val statsThread = new StatsThread(reportingIntervalMs, commitThreads, fetchThread)
+
+      Runtime.getRuntime.addShutdownHook(new Thread() {
+        override def run() {
+          cleanShutdown()
+          statsThread.printStats()
+        }
+      })
+
+      commitThreads.foreach(_.start())
+
+      fetchThread.start()
+
+      statsThread.start()
+
+      commitThreads.foreach(_.join())
+      fetchThread.join()
+      statsThread.join()
+    }
+    catch {
+      case e: Throwable =>
+        println("Error: ", e)
+    }
+    finally {
+      cleanShutdown()
+    }
+
+    def cleanShutdown() {
+      commitThreads.foreach(_.shutdown())
+      commitThreads.foreach(_.join())
+      if (fetchThread != null) {
+        fetchThread.shutdown()
+        fetchThread.join()
+      }
+      if (statsThread != null) {
+        statsThread.shutdown()
+        statsThread.join()
+      }
+      zkClient.close()
+    }
+
+  }
+}
+
diff --git a/core/src/test/scala/other/kafka/TestZKConsumerOffsets.scala b/core/src/test/scala/other/kafka/TestZKConsumerOffsets.scala
deleted file mode 100644
index 31534ca303e91..0000000000000
--- a/core/src/test/scala/other/kafka/TestZKConsumerOffsets.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka
-
-import consumer._
-import utils.Utils
-import java.util.concurrent.CountDownLatch
-
-object TestZKConsumerOffsets {
-  def main(args: Array[String]): Unit = {
-    if(args.length < 1) {
-      println("USAGE: " + TestZKConsumerOffsets.getClass.getName + " consumer.properties topic latest")
-      System.exit(1)
-    }
-    println("Starting consumer...")
-    val topic = args(1)
-    val autoOffsetReset = args(2)    
-    val props = Utils.loadProps(args(0))
-    props.put("auto.offset.reset", "largest")
-    
-    val config = new ConsumerConfig(props)
-    val consumerConnector: ConsumerConnector = Consumer.create(config)
-    val topicMessageStreams = consumerConnector.createMessageStreams(Predef.Map(topic -> 1))
-    var threadList = List[ConsumerThread]()
-    for ((topic, streamList) <- topicMessageStreams)
-      for (stream <- streamList)
-        threadList ::= new ConsumerThread(stream)
-
-    for (thread <- threadList)
-      thread.start
-
-    // attach shutdown handler to catch control-c
-    Runtime.getRuntime().addShutdownHook(new Thread() {
-      override def run() = {
-        consumerConnector.shutdown
-        threadList.foreach(_.shutdown)
-        println("consumer threads shutted down")
-      }
-    })
-  }
-}
-
-private class ConsumerThread(stream: KafkaStream[Array[Byte], Array[Byte]]) extends Thread {
-  val shutdownLatch = new CountDownLatch(1)
-
-  override def run() {
-    println("Starting consumer thread..")
-    for (messageAndMetadata <- stream) {
-      println("consumed: " + new String(messageAndMetadata.message, "UTF-8"))
-    }
-    shutdownLatch.countDown
-    println("thread shutdown !" )
-  }
-
-  def shutdown() {
-    shutdownLatch.await
-  }
-}
diff --git a/core/src/test/scala/unit/kafka/KafkaConfigTest.scala b/core/src/test/scala/unit/kafka/KafkaConfigTest.scala
new file mode 100644
index 0000000000000..4d36b8b1173f6
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/KafkaConfigTest.scala
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package unit.kafka
+
+import java.io.{FileOutputStream, File}
+import java.security.Permission
+
+import kafka.Kafka
+import org.junit.{After, Before, Test}
+import junit.framework.Assert._
+
+class KafkaTest {
+
+  val originalSecurityManager: SecurityManager = System.getSecurityManager
+
+  class ExitCalled extends SecurityException {
+  }
+
+  private class NoExitSecurityManager extends SecurityManager {
+    override def checkExit(status: Int): Unit = {
+      throw new ExitCalled
+    }
+
+    override def checkPermission(perm : Permission): Unit = {
+    }
+
+    override def checkPermission(perm : Permission, context: Object): Unit = {
+    }
+  }
+
+  @Before
+  def setSecurityManager() : Unit = {
+    System.setSecurityManager(new NoExitSecurityManager)
+  }
+
+  @After
+  def setOriginalSecurityManager() : Unit = {
+    System.setSecurityManager(originalSecurityManager)
+  }
+
+  @Test
+  def testGetKafkaConfigFromArgs(): Unit = {
+    val propertiesFile = prepareDefaultConfig()
+
+    // We should load configuration file without any arguments
+    val config1 = Kafka.getKafkaConfigFromArgs(Array(propertiesFile))
+    assertEquals(1, config1.brokerId)
+
+    // We should be able to override given property on command line
+    val config2 = Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "--override", "broker.id=2"))
+    assertEquals(2, config2.brokerId)
+
+    // We should be also able to set completely new property
+    val config3 = Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "--override", "port=1987"))
+    assertEquals(1, config3.brokerId)
+    assertEquals(1987, config3.port)
+
+    // We should be also able to set several properties
+    val config4 = Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "--override", "port=1987", "--override", "broker.id=2"))
+    assertEquals(2, config4.brokerId)
+    assertEquals(1987, config4.port)
+  }
+
+  @Test(expected = classOf[ExitCalled])
+  def testGetKafkaConfigFromArgsWrongSetValue(): Unit = {
+    val propertiesFile = prepareDefaultConfig()
+    Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "--override", "a=b=c"))
+  }
+
+  @Test(expected = classOf[ExitCalled])
+  def testGetKafkaConfigFromArgsNonArgsAtTheEnd(): Unit = {
+    val propertiesFile = prepareDefaultConfig()
+    Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "--override", "broker.id=1", "broker.id=2"))
+  }
+
+  @Test(expected = classOf[ExitCalled])
+  def testGetKafkaConfigFromArgsNonArgsOnly(): Unit = {
+    val propertiesFile = prepareDefaultConfig()
+    Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "broker.id=1", "broker.id=2"))
+  }
+
+  @Test(expected = classOf[ExitCalled])
+  def testGetKafkaConfigFromArgsNonArgsAtTheBegging(): Unit = {
+    val propertiesFile = prepareDefaultConfig()
+    Kafka.getKafkaConfigFromArgs(Array(propertiesFile, "broker.id=1", "--override", "broker.id=2"))
+  }
+
+  def prepareDefaultConfig(): String = {
+    prepareConfig(Array("broker.id=1", "zookeeper.connect=somewhere"))
+  }
+
+  def prepareConfig(lines : Array[String]): String = {
+    val file = File.createTempFile("kafkatest", ".properties")
+    file.deleteOnExit()
+
+    val writer = new FileOutputStream(file)
+    lines.foreach { l =>
+      writer.write(l.getBytes)
+      writer.write("\n".getBytes)
+    }
+
+    writer.close
+
+    file.getAbsolutePath
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
index 115e20305a154..1bf2667f47853 100644
--- a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
@@ -37,10 +37,10 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
   val port3 = TestUtils.choosePort()
   val port4 = TestUtils.choosePort()
 
-  val configProps1 = TestUtils.createBrokerConfig(brokerId1, port1)
-  val configProps2 = TestUtils.createBrokerConfig(brokerId2, port2)
-  val configProps3 = TestUtils.createBrokerConfig(brokerId3, port3)
-  val configProps4 = TestUtils.createBrokerConfig(brokerId4, port4)
+  val configProps1 = TestUtils.createBrokerConfig(brokerId1, port1, false)
+  val configProps2 = TestUtils.createBrokerConfig(brokerId2, port2, false)
+  val configProps3 = TestUtils.createBrokerConfig(brokerId3, port3, false)
+  val configProps4 = TestUtils.createBrokerConfig(brokerId4, port4, false)
 
   var servers: Seq[KafkaServer] = Seq.empty[KafkaServer]
   var brokers: Seq[Broker] = Seq.empty[Broker]
@@ -63,33 +63,11 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
     servers ++= List(server1, server2, server3, server4)
     brokers = servers.map(s => new Broker(s.config.brokerId, s.config.hostName, s.config.port))
 
-    // create topics with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic1, Map(0->Seq(0,1)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic2, Map(0->Seq(1,2)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic3, Map(0->Seq(2,3,0,1)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic4, Map(0->Seq(0,3)))
-
-
-    // wait until leader is elected
-    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, partitionId, 500)
-    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, partitionId, 500)
-    var leader3 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, partitionId, 500)
-    var leader4 = waitUntilLeaderIsElectedOrChanged(zkClient, topic4, partitionId, 500)
-
-    debug("Leader for " + topic1  + " is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic2 + " is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic3 + "is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic4 + "is elected to be: %s".format(leader1.getOrElse(-1)))
-
-    assertTrue("Leader should get elected", leader1.isDefined)
-    assertTrue("Leader should get elected", leader2.isDefined)
-    assertTrue("Leader should get elected", leader3.isDefined)
-    assertTrue("Leader should get elected", leader4.isDefined)
-
-    assertTrue("Leader could be broker 0 or broker 1 for " + topic1, (leader1.getOrElse(-1) == 0) || (leader1.getOrElse(-1) == 1))
-    assertTrue("Leader could be broker 1 or broker 2 for " + topic2, (leader2.getOrElse(-1) == 1) || (leader1.getOrElse(-1) == 2))
-    assertTrue("Leader could be broker 2 or broker 3 for " + topic3, (leader3.getOrElse(-1) == 2) || (leader1.getOrElse(-1) == 3))
-    assertTrue("Leader could be broker 3 or broker 4 for " + topic4, (leader4.getOrElse(-1) == 0) || (leader1.getOrElse(-1) == 3))
+    // create topics first
+    createTopic(zkClient, topic1, partitionReplicaAssignment = Map(0->Seq(0,1)), servers = servers)
+    createTopic(zkClient, topic2, partitionReplicaAssignment = Map(0->Seq(1,2)), servers = servers)
+    createTopic(zkClient, topic3, partitionReplicaAssignment = Map(0->Seq(2,3,0,1)), servers = servers)
+    createTopic(zkClient, topic4, partitionReplicaAssignment = Map(0->Seq(0,3)), servers = servers)
   }
 
   override def tearDown() {
@@ -121,16 +99,16 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
   def testIncrementPartitions {
     AdminUtils.addPartitions(zkClient, topic1, 3)
     // wait until leader is elected
-    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 1, 500)
-    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 2, 500)
+    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 1)
+    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 2)
     val leader1FromZk = ZkUtils.getLeaderForPartition(zkClient, topic1, 1).get
     val leader2FromZk = ZkUtils.getLeaderForPartition(zkClient, topic1, 2).get
     assertEquals(leader1.get, leader1FromZk)
     assertEquals(leader2.get, leader2FromZk)
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic1, 1, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic1, 2, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic1, 1)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic1, 2)
     val metadata = ClientUtils.fetchTopicMetadata(Set(topic1), brokers, "AddPartitionsTest-testIncrementPartitions",
       2000,0).topicsMetadata
     val metaDataForTopic1 = metadata.filter(p => p.topic.equals(topic1))
@@ -146,16 +124,16 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
   def testManualAssignmentOfReplicas {
     AdminUtils.addPartitions(zkClient, topic2, 3, "1:2,0:1,2:3")
     // wait until leader is elected
-    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 1, 500)
-    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 2, 500)
+    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 1)
+    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 2)
     val leader1FromZk = ZkUtils.getLeaderForPartition(zkClient, topic2, 1).get
     val leader2FromZk = ZkUtils.getLeaderForPartition(zkClient, topic2, 2).get
     assertEquals(leader1.get, leader1FromZk)
     assertEquals(leader2.get, leader2FromZk)
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic2, 1, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic2, 2, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic2, 1)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic2, 2)
     val metadata = ClientUtils.fetchTopicMetadata(Set(topic2), brokers, "AddPartitionsTest-testManualAssignmentOfReplicas",
       2000,0).topicsMetadata
     val metaDataForTopic2 = metadata.filter(p => p.topic.equals(topic2))
@@ -171,35 +149,14 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
 
   def testReplicaPlacement {
     AdminUtils.addPartitions(zkClient, topic3, 7)
-    // wait until leader is elected
-    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 1, 500)
-    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 2, 500)
-    var leader3 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 3, 500)
-    var leader4 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 4, 500)
-    var leader5 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 5, 500)
-    var leader6 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 6, 500)
-
-    val leader1FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 1).get
-    val leader2FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 2).get
-    val leader3FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 3).get
-    val leader4FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 4).get
-    val leader5FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 5).get
-    val leader6FromZk = ZkUtils.getLeaderForPartition(zkClient, topic3, 6).get
-
-    assertEquals(leader1.get, leader1FromZk)
-    assertEquals(leader2.get, leader2FromZk)
-    assertEquals(leader3.get, leader3FromZk)
-    assertEquals(leader4.get, leader4FromZk)
-    assertEquals(leader5.get, leader5FromZk)
-    assertEquals(leader6.get, leader6FromZk)
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 1, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 2, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 3, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 4, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 5, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 6, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 1)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 2)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 3)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 4)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 5)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic3, 6)
 
     val metadata = ClientUtils.fetchTopicMetadata(Set(topic3), brokers, "AddPartitionsTest-testReplicaPlacement",
       2000,0).topicsMetadata
@@ -248,4 +205,4 @@ class AddPartitionsTest extends JUnit3Suite with ZooKeeperTestHarness {
     assertEquals(partition6DataForTopic3.replicas(2).id, 2)
     assertEquals(partition6DataForTopic3.replicas(3).id, 3)
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/admin/AdminTest.scala b/core/src/test/scala/unit/kafka/admin/AdminTest.scala
index d5644ea40ec76..e28979827110d 100644
--- a/core/src/test/scala/unit/kafka/admin/AdminTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/AdminTest.scala
@@ -145,7 +145,7 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
     val topic = "test"
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val servers = TestUtils.createBrokerConfigs(4, false).map(b => TestUtils.createServer(new KafkaConfig(b)))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     // reassign partition 0
@@ -156,16 +156,18 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     assertTrue("Partition reassignment attempt failed for [test, 0]", reassignPartitionsCommand.reassignPartitions())
     // wait until reassignment is completed
     TestUtils.waitUntilTrue(() => {
-      val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
-      ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
-      Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
-    }, 1000)
+        val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
+        ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
+        Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
+      },
+      "Partition reassignment should complete")
     val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, partitionToBeReassigned)
     // in sync replicas should not have any replica that is not in the new assigned replicas
     checkForPhantomInSyncReplicas(zkClient, topic, partitionToBeReassigned, assignedReplicas)
     assertEquals("Partition should have been reassigned to 0, 2, 3", newReplicas, assignedReplicas)
     ensureNoUnderReplicatedPartitions(zkClient, topic, partitionToBeReassigned, assignedReplicas, servers)
-    assertTrue(TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet, 5000))
+    TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet,
+                            "New replicas should exist on brokers")
     servers.foreach(_.shutdown())
   }
 
@@ -174,7 +176,7 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
     val topic = "test"
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val servers = TestUtils.createBrokerConfigs(4, false).map(b => TestUtils.createServer(new KafkaConfig(b)))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     // reassign partition 0
@@ -185,15 +187,18 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     assertTrue("Partition reassignment failed for test, 0", reassignPartitionsCommand.reassignPartitions())
     // wait until reassignment is completed
     TestUtils.waitUntilTrue(() => {
-      val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
-      ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
-        Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
-    }, 1000)
+        val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
+        ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
+          Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
+      },
+      "Partition reassignment should complete")
     val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, partitionToBeReassigned)
     assertEquals("Partition should have been reassigned to 0, 2, 3", newReplicas, assignedReplicas)
     checkForPhantomInSyncReplicas(zkClient, topic, partitionToBeReassigned, assignedReplicas)
     ensureNoUnderReplicatedPartitions(zkClient, topic, partitionToBeReassigned, assignedReplicas, servers)
-    assertTrue(TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet, 5000))
+    TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet,
+                            "New replicas should exist on brokers")
+
     servers.foreach(_.shutdown())
   }
 
@@ -202,7 +207,7 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val expectedReplicaAssignment = Map(0  -> List(0, 1))
     val topic = "test"
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val servers = TestUtils.createBrokerConfigs(4, false).map(b => TestUtils.createServer(new KafkaConfig(b)))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     // reassign partition 0
@@ -213,15 +218,17 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     assertTrue("Partition reassignment failed for test, 0", reassignPartitionsCommand.reassignPartitions())
     // wait until reassignment is completed
     TestUtils.waitUntilTrue(() => {
-      val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
-      ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
-        Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
-    }, 2000)
+        val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
+        ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
+          Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
+      },
+      "Partition reassignment should complete")
     val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, partitionToBeReassigned)
     assertEquals("Partition should have been reassigned to 2, 3", newReplicas, assignedReplicas)
     checkForPhantomInSyncReplicas(zkClient, topic, partitionToBeReassigned, assignedReplicas)
     ensureNoUnderReplicatedPartitions(zkClient, topic, partitionToBeReassigned, assignedReplicas, servers)
-    assertTrue(TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet, 5000))
+    TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet,
+                            "New replicas should exist on brokers")
     servers.foreach(_.shutdown())
   }
 
@@ -229,7 +236,7 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
   def testReassigningNonExistingPartition() {
     val topic = "test"
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val servers = TestUtils.createBrokerConfigs(4, false).map(b => TestUtils.createServer(new KafkaConfig(b)))
     // reassign partition 0
     val newReplicas = Seq(2, 3)
     val partitionToBeReassigned = 0
@@ -255,14 +262,18 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val reassignPartitionsCommand = new ReassignPartitionsCommand(zkClient, Map(topicAndPartition -> newReplicas))
     reassignPartitionsCommand.reassignPartitions
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(2).map(b => TestUtils.createServer(new KafkaConfig(b)))
-    TestUtils.waitUntilTrue(() => checkIfReassignPartitionPathExists(zkClient), 1000)
+    val servers = TestUtils.createBrokerConfigs(2, false).map(b => TestUtils.createServer(new KafkaConfig(b)))
+
+    // wait until reassignment completes
+    TestUtils.waitUntilTrue(() => !checkIfReassignPartitionPathExists(zkClient),
+                            "Partition reassignment should complete")
     val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, partitionToBeReassigned)
     assertEquals("Partition should have been reassigned to 0, 1", newReplicas, assignedReplicas)
     checkForPhantomInSyncReplicas(zkClient, topic, partitionToBeReassigned, assignedReplicas)
     // ensure that there are no under replicated partitions
     ensureNoUnderReplicatedPartitions(zkClient, topic, partitionToBeReassigned, assignedReplicas, servers)
-    assertTrue(TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet, 5000))
+    TestUtils.waitUntilTrue(() => getBrokersWithPartitionDir(servers, topic, 0) == newReplicas.toSet,
+                            "New replicas should exist on brokers")
     servers.foreach(_.shutdown())
   }
 
@@ -287,16 +298,16 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val partition = 1
     val preferredReplica = 0
     // create brokers
-    val serverConfigs = TestUtils.createBrokerConfigs(3).map(new KafkaConfig(_))
+    val serverConfigs = TestUtils.createBrokerConfigs(3, false).map(new KafkaConfig(_))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     val servers = serverConfigs.reverse.map(s => TestUtils.createServer(s))
     // broker 2 should be the leader since it was started first
-    val currentLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, 1000, None).get
+    val currentLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = None).get
     // trigger preferred replica election
     val preferredReplicaElection = new PreferredReplicaLeaderElectionCommand(zkClient, Set(TopicAndPartition(topic, partition)))
     preferredReplicaElection.moveLeaderToPreferredReplica()
-    val newLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, 1000, Some(currentLeader)).get
+    val newLeader = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition, oldLeaderOpt = Some(currentLeader)).get
     assertEquals("Preferred replica election failed", preferredReplica, newLeader)
     servers.foreach(_.shutdown())
   }
@@ -307,11 +318,10 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     val topic = "test"
     val partition = 1
     // create brokers
-    val serverConfigs = TestUtils.createBrokerConfigs(3).map(new KafkaConfig(_))
+    val serverConfigs = TestUtils.createBrokerConfigs(3, false).map(new KafkaConfig(_))
     val servers = serverConfigs.reverse.map(s => TestUtils.createServer(s))
     // create the topic
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, partition, 1000)
+    TestUtils.createTopic(zkClient, topic, partitionReplicaAssignment = expectedReplicaAssignment, servers = servers)
 
     val controllerId = ZkUtils.getController(zkClient)
     val controller = servers.find(p => p.config.brokerId == controllerId).get.kafkaController
@@ -319,10 +329,11 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
     var activeServers = servers.filter(s => s.config.brokerId != 2)
     try {
       // wait for the update metadata request to trickle to the brokers
-      assertTrue("Topic test not created after timeout", TestUtils.waitUntilTrue(() =>
-        activeServers.foldLeft(true)(_ && _.apis.metadataCache(TopicAndPartition(topic, partition)).leaderIsrAndControllerEpoch.leaderAndIsr.isr.size != 3), 1000))
+      TestUtils.waitUntilTrue(() =>
+        activeServers.foldLeft(true)(_ && _.apis.metadataCache.getPartitionInfo(topic,partition).get.leaderIsrAndControllerEpoch.leaderAndIsr.isr.size != 3),
+        "Topic test not created after timeout")
       assertEquals(0, partitionsRemaining.size)
-      var partitionStateInfo = activeServers.head.apis.metadataCache(TopicAndPartition(topic, partition))
+      var partitionStateInfo = activeServers.head.apis.metadataCache.getPartitionInfo(topic,partition).get
       var leaderAfterShutdown = partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leader
       assertEquals(0, leaderAfterShutdown)
       assertEquals(2, partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.isr.size)
@@ -331,15 +342,15 @@ class AdminTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
       partitionsRemaining = controller.shutdownBroker(1)
       assertEquals(0, partitionsRemaining.size)
       activeServers = servers.filter(s => s.config.brokerId == 0)
-      partitionStateInfo = activeServers.head.apis.metadataCache(TopicAndPartition(topic, partition))
+      partitionStateInfo = activeServers.head.apis.metadataCache.getPartitionInfo(topic,partition).get
       leaderAfterShutdown = partitionStateInfo.leaderIsrAndControllerEpoch.leaderAndIsr.leader
       assertEquals(0, leaderAfterShutdown)
 
-      assertTrue(servers.foldLeft(true)(_ && _.apis.metadataCache(TopicAndPartition(topic, partition)).leaderIsrAndControllerEpoch.leaderAndIsr.leader == 0))
+      assertTrue(servers.foldLeft(true)(_ && _.apis.metadataCache.getPartitionInfo(topic,partition).get.leaderIsrAndControllerEpoch.leaderAndIsr.leader == 0))
       partitionsRemaining = controller.shutdownBroker(0)
       assertEquals(1, partitionsRemaining.size)
       // leader doesn't change since all the replicas are shut down
-      assertTrue(servers.foldLeft(true)(_ && _.apis.metadataCache(TopicAndPartition(topic, partition)).leaderIsrAndControllerEpoch.leaderAndIsr.leader == 0))
+      assertTrue(servers.foldLeft(true)(_ && _.apis.metadataCache.getPartitionInfo(topic,partition).get.leaderIsrAndControllerEpoch.leaderAndIsr.leader == 0))
     }
     finally {
       servers.foreach(_.shutdown())
diff --git a/core/src/test/scala/unit/kafka/admin/DeleteTopicTest.scala b/core/src/test/scala/unit/kafka/admin/DeleteTopicTest.scala
index 974b057a88056..29cc01bcef9ca 100644
--- a/core/src/test/scala/unit/kafka/admin/DeleteTopicTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/DeleteTopicTest.scala
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -57,12 +57,12 @@ class DeleteTopicTest extends JUnit3Suite with ZooKeeperTestHarness {
     // start topic deletion
     AdminUtils.deleteTopic(zkClient, topic)
     // check if all replicas but the one that is shut down has deleted the log
-    assertTrue("Replicas 0,1 have not deleted log in 1000ms", TestUtils.waitUntilTrue(() =>
+    TestUtils.waitUntilTrue(() =>
       servers.filter(s => s.config.brokerId != follower.config.brokerId)
-        .foldLeft(true)((res, server) => res && server.getLogManager().getLog(topicAndPartition).isEmpty), 1000))
+        .foldLeft(true)((res, server) => res && server.getLogManager().getLog(topicAndPartition).isEmpty), "Replicas 0,1 have not deleted log.")
     // ensure topic deletion is halted
-    assertTrue("Admin path /admin/delete_topic/test path deleted in 1000ms even when a follower replica is down",
-      TestUtils.waitUntilTrue(() => ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)), 500))
+    TestUtils.waitUntilTrue(() => ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)),
+      "Admin path /admin/delete_topic/test path deleted even when a follower replica is down")
     // restart follower replica
     follower.startup()
     verifyTopicDeletion(topic, servers)
@@ -74,161 +74,48 @@ class DeleteTopicTest extends JUnit3Suite with ZooKeeperTestHarness {
     val topicAndPartition = TopicAndPartition("test", 0)
     val topic = topicAndPartition.topic
     val servers = createTestTopicAndCluster(topic)
+    val controllerId = ZkUtils.getController(zkClient)
+    val controller = servers.filter(s => s.config.brokerId == controllerId).head
+    val leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
+    val follower = servers.filter(s => s.config.brokerId != leaderIdOpt.get && s.config.brokerId != controllerId).last
+    follower.shutdown()
+
     // start topic deletion
     AdminUtils.deleteTopic(zkClient, topic)
     // shut down the controller to trigger controller failover during delete topic
-    val controllerId = ZkUtils.getController(zkClient)
-    val controller = servers.filter(s => s.config.brokerId == controllerId).head
     controller.shutdown()
+
     // ensure topic deletion is halted
-    assertTrue("Admin path /admin/delete_topic/test path deleted in 500ms even when a replica is down",
-      TestUtils.waitUntilTrue(() => ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)), 500))
-    // restart follower replica
-    controller.startup()
-    // wait until admin path for delete topic is deleted, signaling completion of topic deletion
-    assertTrue("Admin path /admin/delete_topic/test path not deleted in 4000ms even after a follower replica is restarted",
-      TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)), 4000))
-    assertTrue("Topic path /brokers/topics/test not deleted after /admin/delete_topic/test path is deleted",
-      TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getTopicPath(topic)), 100))
-    // ensure that logs from all replicas are deleted if delete topic is marked successful in zookeeper
-    assertTrue("Replica logs not deleted after delete topic is complete",
-      servers.foldLeft(true)((res, server) => res && server.getLogManager().getLog(topicAndPartition).isEmpty))
-    servers.foreach(_.shutdown())
-  }
+    TestUtils.waitUntilTrue(() => ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)),
+      "Admin path /admin/delete_topic/test path deleted even when a replica is down")
 
-  @Test
-  def testRequestHandlingDuringDeleteTopic() {
-    val topicAndPartition = TopicAndPartition("test", 0)
-    val topic = topicAndPartition.topic
-    val servers = createTestTopicAndCluster(topic)
-    // start topic deletion
-    AdminUtils.deleteTopic(zkClient, topic)
-    // shut down one follower replica
-    var leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
-    assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
-    val follower = servers.filter(s => s.config.brokerId != leaderIdOpt.get).last
-    follower.shutdown()
-    // test if produce requests are failed with UnknownTopicOrPartitionException during delete topic
-    val props1 = new Properties()
-    props1.put("metadata.broker.list", servers.map(s => s.config.hostName + ":" + s.config.port).mkString(","))
-    props1.put("serializer.class", "kafka.serializer.StringEncoder")
-    props1.put("request.required.acks", "1")
-    val producerConfig1 = new ProducerConfig(props1)
-    val producer1 = new Producer[String, String](producerConfig1)
-    try{
-      producer1.send(new KeyedMessage[String, String](topic, "test", "test1"))
-      fail("Test should fail because the topic is being deleted")
-    } catch {
-      case e: FailedToSendMessageException =>
-      case oe: Throwable => fail("fails with exception", oe)
-    } finally {
-      producer1.close()
-    }
-    // test if fetch requests fail during delete topic
-    servers.filter(s => s.config.brokerId != follower.config.brokerId).foreach { server =>
-      val consumer = new SimpleConsumer(server.config.hostName, server.config.port, 1000000, 64*1024, "")
-      val request = new FetchRequestBuilder()
-        .clientId("test-client")
-        .addFetch(topic, 0, 0, 10000)
-        .build()
-      val fetched = consumer.fetch(request)
-      val fetchResponse = fetched.data(topicAndPartition)
-      assertTrue("Fetch should fail with UnknownTopicOrPartitionCode", fetchResponse.error == ErrorMapping.UnknownTopicOrPartitionCode)
-    }
-    // test if offset requests fail during delete topic
-    servers.filter(s => s.config.brokerId != follower.config.brokerId).foreach { server =>
-      val consumer = new SimpleConsumer(server.config.hostName, server.config.port, 1000000, 64*1024, "")
-      val offsetRequest = new OffsetRequest(Map(topicAndPartition -> new PartitionOffsetRequestInfo(OffsetRequest.LatestTime, 1)))
-      val offsetResponse = consumer.getOffsetsBefore(offsetRequest)
-      val errorCode = offsetResponse.partitionErrorAndOffsets(topicAndPartition).error
-      assertTrue("Offset request should fail with UnknownTopicOrPartitionCode", errorCode == ErrorMapping.UnknownTopicOrPartitionCode)
-      // test if offset fetch requests fail during delete topic
-      val offsetFetchRequest = new OffsetFetchRequest("test-group", Seq(topicAndPartition))
-      val offsetFetchResponse = consumer.fetchOffsets(offsetFetchRequest)
-      val offsetFetchErrorCode = offsetFetchResponse.requestInfo(topicAndPartition).error
-      assertTrue("Offset fetch request should fail with UnknownTopicOrPartitionCode",
-        offsetFetchErrorCode == ErrorMapping.UnknownTopicOrPartitionCode)
-      // TODO: test if offset commit requests fail during delete topic
-    }
-    // restart follower replica
+    controller.startup()
     follower.startup()
-    verifyTopicDeletion(topic, servers)
-    servers.foreach(_.shutdown())
-  }
 
-  @Test
-  def testPreferredReplicaElectionDuringDeleteTopic() {
-    val topicAndPartition = TopicAndPartition("test", 0)
-    val topic = topicAndPartition.topic
-    val servers = createTestTopicAndCluster(topic)
-    var leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
-    assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
-    // shut down the controller to move the leader to a non preferred replica before delete topic
-    val preferredReplicaId = 0
-    val preferredReplica = servers.filter(s => s.config.brokerId == preferredReplicaId).head
-    preferredReplica.shutdown()
-    preferredReplica.startup()
-    val newLeaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 3000, leaderIdOpt)
-    assertTrue("New leader should be elected prior to delete topic", newLeaderIdOpt.isDefined)
-    // start topic deletion
-    AdminUtils.deleteTopic(zkClient, topic)
-    // test preferred replica election
-    val preferredReplicaElection = new PreferredReplicaLeaderElectionCommand(zkClient, Set(topicAndPartition))
-    preferredReplicaElection.moveLeaderToPreferredReplica()
-    val leaderAfterPreferredReplicaElectionOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000, newLeaderIdOpt)
-    assertTrue("Preferred replica election should not move leader during delete topic",
-      leaderAfterPreferredReplicaElectionOpt.isEmpty || leaderAfterPreferredReplicaElectionOpt.get == newLeaderIdOpt.get)
-    val newControllerId = ZkUtils.getController(zkClient)
-    val newController = servers.filter(s => s.config.brokerId == newControllerId).head
-    assertFalse("Preferred replica election should fail",
-      newController.kafkaController.controllerContext.partitionsUndergoingPreferredReplicaElection.contains(topicAndPartition))
-    verifyTopicDeletion(topic, servers)
-    servers.foreach(_.shutdown())
-  }
-
-  @Test
-  def testDeleteTopicDuringPreferredReplicaElection() {
-    val topic = "test"
-    val topicAndPartition = TopicAndPartition(topic, 0)
-    val servers = createTestTopicAndCluster(topic)
-    var leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
-    assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
-    // shut down the controller to move the leader to a non preferred replica before delete topic
-    val preferredReplicaId = 0
-    val preferredReplica = servers.filter(s => s.config.brokerId == preferredReplicaId).head
-    preferredReplica.shutdown()
-    preferredReplica.startup()
-    val newLeaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 3000, leaderIdOpt)
-    assertTrue("New leader should be elected prior to delete topic", newLeaderIdOpt.isDefined)
-    // test preferred replica election
-    val preferredReplicaElection = new PreferredReplicaLeaderElectionCommand(zkClient, Set(topicAndPartition))
-    preferredReplicaElection.moveLeaderToPreferredReplica()
-    // start topic deletion during preferred replica election. This should halt topic deletion but eventually
-    // complete it successfully
-    AdminUtils.deleteTopic(zkClient, topic)
-    val newControllerId = ZkUtils.getController(zkClient)
-    val newController = servers.filter(s => s.config.brokerId == newControllerId).head
-    assertTrue("Preferred replica election should succeed after 1000ms", TestUtils.waitUntilTrue(() =>
-      !newController.kafkaController.controllerContext.partitionsUndergoingPreferredReplicaElection.contains(topicAndPartition), 1000))
     verifyTopicDeletion(topic, servers)
     servers.foreach(_.shutdown())
   }
 
   @Test
   def testPartitionReassignmentDuringDeleteTopic() {
-    val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
+    val expectedReplicaAssignment = Map(0 -> List(0, 1, 2))
     val topic = "test"
     val topicAndPartition = TopicAndPartition(topic, 0)
+    val brokerConfigs = TestUtils.createBrokerConfigs(4, false)
+    brokerConfigs.foreach(p => p.setProperty("delete.topic.enable", "true"))
     // create brokers
-    val allServers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val allServers = brokerConfigs.map(b => TestUtils.createServer(new KafkaConfig(b)))
     val servers = allServers.filter(s => expectedReplicaAssignment(0).contains(s.config.brokerId))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     // wait until replica log is created on every broker
-    assertTrue("Replicas for topic test not created in 1000ms", TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
-      res && server.getLogManager().getLog(topicAndPartition).isDefined), 1000))
-    var leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
+    TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
+      res && server.getLogManager().getLog(topicAndPartition).isDefined),
+      "Replicas for topic test not created.")
+    val leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
     assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
+    val follower = servers.filter(s => s.config.brokerId != leaderIdOpt.get).last
+    follower.shutdown()
     // start topic deletion
     AdminUtils.deleteTopic(zkClient, topic)
     // start partition reassignment at the same time right after delete topic. In this case, reassignment will fail since
@@ -243,70 +130,38 @@ class DeleteTopicTest extends JUnit3Suite with ZooKeeperTestHarness {
       val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
       ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
         Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentFailed;
-    }, 1000)
+    }, "Partition reassignment shouldn't complete.")
     val controllerId = ZkUtils.getController(zkClient)
     val controller = servers.filter(s => s.config.brokerId == controllerId).head
     assertFalse("Partition reassignment should fail",
       controller.kafkaController.controllerContext.partitionsBeingReassigned.contains(topicAndPartition))
     val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, 0)
     assertEquals("Partition should not be reassigned to 0, 1, 2", oldAssignedReplicas, assignedReplicas)
+    follower.startup()
     verifyTopicDeletion(topic, servers)
     allServers.foreach(_.shutdown())
   }
 
-  @Test
-  def testDeleteTopicDuringPartitionReassignment() {
-    val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
-    val topic = "test"
-    val topicAndPartition = TopicAndPartition(topic, 0)
-    // create brokers
-    val allServers = TestUtils.createBrokerConfigs(4).map(b => TestUtils.createServer(new KafkaConfig(b)))
-    val servers = allServers.filter(s => expectedReplicaAssignment(0).contains(s.config.brokerId))
-    // create the topic
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
-    // wait until replica log is created on every broker
-    assertTrue("Replicas for topic test not created in 1000ms", TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
-      res && server.getLogManager().getLog(topicAndPartition).isDefined), 1000))
-    var leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
-    assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
-    // start partition reassignment at the same time right before delete topic. In this case, reassignment will succeed
-    // reassign partition 0
-    val newReplicas = Seq(1, 2, 3)
-    val reassignPartitionsCommand = new ReassignPartitionsCommand(zkClient, Map(topicAndPartition -> newReplicas))
-    assertTrue("Partition reassignment failed for test, 0", reassignPartitionsCommand.reassignPartitions())
-    // start topic deletion
-    AdminUtils.deleteTopic(zkClient, topic)
-    // wait until reassignment is completed
-    TestUtils.waitUntilTrue(() => {
-      val partitionsBeingReassigned = ZkUtils.getPartitionsBeingReassigned(zkClient).mapValues(_.newReplicas);
-      ReassignPartitionsCommand.checkIfPartitionReassignmentSucceeded(zkClient, topicAndPartition, newReplicas,
-        Map(topicAndPartition -> newReplicas), partitionsBeingReassigned) == ReassignmentCompleted;
-    }, 1000)
-    val controllerId = ZkUtils.getController(zkClient)
-    val controller = servers.filter(s => s.config.brokerId == controllerId).head
-    assertFalse("Partition reassignment should complete",
-      controller.kafkaController.controllerContext.partitionsBeingReassigned.contains(topicAndPartition))
-    val assignedReplicas = ZkUtils.getReplicasForPartition(zkClient, topic, 0)
-    assertEquals("Partition should be reassigned to 1,2,3", newReplicas, assignedReplicas)
-    verifyTopicDeletion(topic, allServers)
-    allServers.foreach(_.shutdown())
-  }
-
   @Test
   def testDeleteTopicDuringAddPartition() {
     val topic = "test"
     val servers = createTestTopicAndCluster(topic)
-    // add partitions to topic
-    val topicAndPartition = TopicAndPartition(topic, 0)
+    val leaderIdOpt = ZkUtils.getLeaderForPartition(zkClient, topic, 0)
+    assertTrue("Leader should exist for partition [test,0]", leaderIdOpt.isDefined)
+    val follower = servers.filter(s => s.config.brokerId != leaderIdOpt.get).last
     val newPartition = TopicAndPartition(topic, 1)
-    AdminUtils.addPartitions(zkClient, topic, 2, "0:1:2,0:1:2")
+    follower.shutdown()
+    // add partitions to topic
+    AdminUtils.addPartitions(zkClient, topic, 2, "0:1:2,0:1:2", false)
     // start topic deletion
     AdminUtils.deleteTopic(zkClient, topic)
+    follower.startup()
     // test if topic deletion is resumed
-   verifyTopicDeletion(topic, servers)
+    verifyTopicDeletion(topic, servers)
     // verify that new partition doesn't exist on any broker either
-    assertTrue("Replica logs not for new partition [test,1] not deleted after delete topic is complete", TestUtils.waitUntilTrue(() =>
-      servers.foldLeft(true)((res, server) => res && server.getLogManager().getLog(newPartition).isEmpty), 1000))
+    TestUtils.waitUntilTrue(() =>
+      servers.foldLeft(true)((res, server) => res && server.getLogManager().getLog(newPartition).isEmpty),
+      "Replica logs not for new partition [test,1] not deleted after delete topic is complete.")
     servers.foreach(_.shutdown())
   }
 
@@ -329,7 +184,7 @@ class DeleteTopicTest extends JUnit3Suite with ZooKeeperTestHarness {
 
   @Test
   def testRecreateTopicAfterDeletion() {
-    val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
+    val expectedReplicaAssignment = Map(0 -> List(0, 1, 2))
     val topic = "test"
     val topicAndPartition = TopicAndPartition(topic, 0)
     val servers = createTestTopicAndCluster(topic)
@@ -342,52 +197,61 @@ class DeleteTopicTest extends JUnit3Suite with ZooKeeperTestHarness {
     val leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
     assertTrue("New leader should be elected after re-creating topic test", leaderIdOpt.isDefined)
     // check if all replica logs are created
-    assertTrue("Replicas for topic test not created in 1000ms", TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
-      res && server.getLogManager().getLog(topicAndPartition).isDefined), 1000))
+    TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) => res && server.getLogManager().getLog(topicAndPartition).isDefined),
+      "Replicas for topic test not created.")
     servers.foreach(_.shutdown())
   }
 
   @Test
-  def testTopicConfigChangesDuringDeleteTopic() {
-    val topic = "test"
+  def testDeleteNonExistingTopic() {
+    val topicAndPartition = TopicAndPartition("test", 0)
+    val topic = topicAndPartition.topic
     val servers = createTestTopicAndCluster(topic)
-    val topicConfigs = new Properties()
-    topicConfigs.put("segment.ms", "1000000")
     // start topic deletion
-    AdminUtils.deleteTopic(zkClient, topic)
-    verifyTopicDeletion(topic, servers)
-    // make topic config changes
-    try {
-      AdminUtils.changeTopicConfig(zkClient, topic, topicConfigs)
-      fail("Should fail with AdminOperationException for topic doesn't exist")
-    } catch {
-      case e: AdminOperationException => // expected
-    }
+    AdminUtils.deleteTopic(zkClient, "test2")
+    // verify delete topic path for test2 is removed from zookeeper
+    verifyTopicDeletion("test2", servers)
+    // verify that topic test is untouched
+    TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
+      res && server.getLogManager().getLog(topicAndPartition).isDefined),
+      "Replicas for topic test not created")
+    // test the topic path exists
+    assertTrue("Topic test mistakenly deleted", ZkUtils.pathExists(zkClient, ZkUtils.getTopicPath(topic)))
+    // topic test should have a leader
+    val leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
+    assertTrue("Leader should exist for topic test", leaderIdOpt.isDefined)
     servers.foreach(_.shutdown())
+
   }
 
   private def createTestTopicAndCluster(topic: String): Seq[KafkaServer] = {
-    val expectedReplicaAssignment = Map(0  -> List(0, 1, 2))
+    val expectedReplicaAssignment = Map(0 -> List(0, 1, 2))
     val topicAndPartition = TopicAndPartition(topic, 0)
+    val brokerConfigs = TestUtils.createBrokerConfigs(3, false)
+    brokerConfigs.foreach(p => p.setProperty("delete.topic.enable", "true"))
     // create brokers
-    val servers = TestUtils.createBrokerConfigs(3).map(b => TestUtils.createServer(new KafkaConfig(b)))
+    val servers = brokerConfigs.map(b => TestUtils.createServer(new KafkaConfig(b)))
     // create the topic
     AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
     // wait until replica log is created on every broker
-    assertTrue("Replicas for topic test not created in 1000ms", TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
-      res && server.getLogManager().getLog(topicAndPartition).isDefined), 1000))
+    TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) =>
+      res && server.getLogManager().getLog(topicAndPartition).isDefined),
+      "Replicas for topic test not created")
     servers
   }
 
   private def verifyTopicDeletion(topic: String, servers: Seq[KafkaServer]) {
     val topicAndPartition = TopicAndPartition(topic, 0)
     // wait until admin path for delete topic is deleted, signaling completion of topic deletion
-    assertTrue("Admin path /admin/delete_topic/test path not deleted in 1000ms even after a replica is restarted",
-      TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)), 1000))
-    assertTrue("Topic path /brokers/topics/test not deleted after /admin/delete_topic/test path is deleted",
-      TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getTopicPath(topic)), 100))
+    TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getDeleteTopicPath(topic)),
+      "Admin path /admin/delete_topic/test path not deleted even after a replica is restarted")
+    TestUtils.waitUntilTrue(() => !ZkUtils.pathExists(zkClient, ZkUtils.getTopicPath(topic)),
+      "Topic path /brokers/topics/test not deleted after /admin/delete_topic/test path is deleted")
+    // ensure that the topic-partition has been deleted from all brokers' replica managers
+    TestUtils.waitUntilTrue(() => servers.foldLeft(true)((res, server) => res && server.replicaManager.getPartition(topic, 0) == None),
+      "Replica manager's should have deleted all of this topic's partitions")
     // ensure that logs from all replicas are deleted if delete topic is marked successful in zookeeper
     assertTrue("Replica logs not deleted after delete topic is complete",
       servers.foldLeft(true)((res, server) => res && server.getLogManager().getLog(topicAndPartition).isEmpty))
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala b/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala
new file mode 100644
index 0000000000000..ac6dd2087de45
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.admin
+
+import junit.framework.Assert._
+import org.junit.Test
+import org.scalatest.junit.JUnit3Suite
+import kafka.utils.Logging
+import kafka.utils.TestUtils
+import kafka.zk.ZooKeeperTestHarness
+import kafka.server.KafkaConfig
+import kafka.admin.TopicCommand.TopicCommandOptions
+import kafka.utils.ZkUtils
+
+class TopicCommandTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
+
+  @Test
+  def testConfigPreservationAcrossPartitionAlteration() {
+    val topic = "test"
+    val numPartitionsOriginal = 1
+    val cleanupKey = "cleanup.policy"
+    val cleanupVal = "compact"
+    // create brokers
+    val brokers = List(0, 1, 2)
+    TestUtils.createBrokersInZk(zkClient, brokers)
+    // create the topic
+    val createOpts = new TopicCommandOptions(Array("--partitions", numPartitionsOriginal.toString,
+      "--replication-factor", "1",
+      "--config", cleanupKey + "=" + cleanupVal,
+      "--topic", topic))
+    TopicCommand.createTopic(zkClient, createOpts)
+    val props = AdminUtils.fetchTopicConfig(zkClient, topic)
+    assertTrue("Properties after creation don't contain " + cleanupKey, props.containsKey(cleanupKey))
+    assertTrue("Properties after creation have incorrect value", props.getProperty(cleanupKey).equals(cleanupVal))
+
+    // pre-create the topic config changes path to avoid a NoNodeException
+    ZkUtils.createPersistentPath(zkClient, ZkUtils.TopicConfigChangesPath)
+
+    // modify the topic to add new partitions
+    val numPartitionsModified = 3
+    val alterOpts = new TopicCommandOptions(Array("--partitions", numPartitionsModified.toString,
+      "--config", cleanupKey + "=" + cleanupVal,
+      "--topic", topic))
+    TopicCommand.alterTopic(zkClient, alterOpts)
+    val newProps = AdminUtils.fetchTopicConfig(zkClient, topic)
+    assertTrue("Updated properties do not contain " + cleanupKey, newProps.containsKey(cleanupKey))
+    assertTrue("Updated properties have incorrect value", newProps.getProperty(cleanupKey).equals(cleanupVal))
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/api/RequestResponseSerializationTest.scala b/core/src/test/scala/unit/kafka/api/RequestResponseSerializationTest.scala
index eb274d18c716f..cd16ced5465d0 100644
--- a/core/src/test/scala/unit/kafka/api/RequestResponseSerializationTest.scala
+++ b/core/src/test/scala/unit/kafka/api/RequestResponseSerializationTest.scala
@@ -23,12 +23,17 @@ import junit.framework.Assert._
 import java.nio.ByteBuffer
 import kafka.message.{Message, ByteBufferMessageSet}
 import kafka.cluster.Broker
-import collection.mutable._
-import kafka.common.{TopicAndPartition, ErrorMapping, OffsetMetadataAndError}
+import kafka.common.{OffsetAndMetadata, ErrorMapping, OffsetMetadataAndError}
+import kafka.utils.SystemTime
+import org.apache.kafka.common.requests._
+import org.apache.kafka.common.protocol.ApiKeys
+import scala.Some
 import kafka.controller.LeaderIsrAndControllerEpoch
+import kafka.common.TopicAndPartition
+import org.apache.kafka.common.TopicPartition
 
 
-object SerializationTestUtils{
+object SerializationTestUtils {
   private val topic1 = "test1"
   private val topic2 = "test2"
   private val leader1 = 0
@@ -143,21 +148,29 @@ object SerializationTestUtils{
   }
 
   def createTestTopicMetadataResponse: TopicMetadataResponse = {
-    new TopicMetadataResponse(Seq(topicmetaData1, topicmetaData2), 1)
+    new TopicMetadataResponse(brokers, Seq(topicmetaData1, topicmetaData2), 1)
   }
 
-  def createTestOffsetCommitRequest: OffsetCommitRequest = {
+  def createTestOffsetCommitRequestV1: OffsetCommitRequest = {
     new OffsetCommitRequest("group 1", collection.immutable.Map(
-      TopicAndPartition(topic1, 0) -> OffsetMetadataAndError(offset=42L, metadata="some metadata"),
-      TopicAndPartition(topic1, 1) -> OffsetMetadataAndError(offset=100L, metadata=OffsetMetadataAndError.NoMetadata)
+      TopicAndPartition(topic1, 0) -> OffsetAndMetadata(offset=42L, metadata="some metadata", timestamp=SystemTime.milliseconds),
+      TopicAndPartition(topic1, 1) -> OffsetAndMetadata(offset=100L, metadata=OffsetAndMetadata.NoMetadata, timestamp=SystemTime.milliseconds)
     ))
   }
 
+  def createTestOffsetCommitRequestV0: OffsetCommitRequest = {
+    new OffsetCommitRequest(
+      versionId = 0,
+      groupId = "group 1",
+      requestInfo = collection.immutable.Map(
+        TopicAndPartition(topic1, 0) -> OffsetAndMetadata(offset=42L, metadata="some metadata", timestamp=SystemTime.milliseconds),
+        TopicAndPartition(topic1, 1) -> OffsetAndMetadata(offset=100L, metadata=OffsetAndMetadata.NoMetadata, timestamp=SystemTime.milliseconds)
+      ))
+  }
+
   def createTestOffsetCommitResponse: OffsetCommitResponse = {
-    new OffsetCommitResponse(collection.immutable.Map(
-      TopicAndPartition(topic1, 0) -> ErrorMapping.NoError,
-      TopicAndPartition(topic1, 1) -> ErrorMapping.UnknownTopicOrPartitionCode
-    ))
+    new OffsetCommitResponse(collection.immutable.Map(TopicAndPartition(topic1, 0) -> ErrorMapping.NoError,
+                                 TopicAndPartition(topic1, 1) -> ErrorMapping.NoError))
   }
 
   def createTestOffsetFetchRequest: OffsetFetchRequest = {
@@ -170,11 +183,39 @@ object SerializationTestUtils{
   def createTestOffsetFetchResponse: OffsetFetchResponse = {
     new OffsetFetchResponse(collection.immutable.Map(
       TopicAndPartition(topic1, 0) -> OffsetMetadataAndError(42L, "some metadata", ErrorMapping.NoError),
-      TopicAndPartition(topic1, 1) -> OffsetMetadataAndError(100L, OffsetMetadataAndError.NoMetadata,
-        ErrorMapping.UnknownTopicOrPartitionCode)
+      TopicAndPartition(topic1, 1) -> OffsetMetadataAndError(100L, OffsetAndMetadata.NoMetadata, ErrorMapping.UnknownTopicOrPartitionCode)
     ))
   }
 
+  def createConsumerMetadataRequest: ConsumerMetadataRequest = {
+    ConsumerMetadataRequest("group 1", clientId = "client 1")
+  }
+
+  def createConsumerMetadataResponse: ConsumerMetadataResponse = {
+    ConsumerMetadataResponse(Some(brokers.head), ErrorMapping.NoError)
+  }
+
+  def createHeartbeatRequestAndHeader: HeartbeatRequestAndHeader = {
+    val body = new HeartbeatRequest("group1", 1, "consumer1")
+    HeartbeatRequestAndHeader(0.asInstanceOf[Short], 1, "", body)
+  }
+
+  def createHeartbeatResponseAndHeader: HeartbeatResponseAndHeader = {
+    val body = new HeartbeatResponse(0.asInstanceOf[Short])
+    HeartbeatResponseAndHeader(1, body)
+  }
+
+  def createJoinGroupRequestAndHeader: JoinGroupRequestAndHeader = {
+    import scala.collection.JavaConversions._
+    val body = new JoinGroupRequest("group1", 30000, List("topic1"), "consumer1", "strategy1");
+    JoinGroupRequestAndHeader(0.asInstanceOf[Short], 1, "", body)
+  }
+
+  def createJoinGroupResponseAndHeader: JoinGroupResponseAndHeader = {
+    import scala.collection.JavaConversions._
+    val body = new JoinGroupResponse(0.asInstanceOf[Short], 1, "consumer1", List(new TopicPartition("test11", 1)))
+    JoinGroupResponseAndHeader(1, body)
+  }
 }
 
 class RequestResponseSerializationTest extends JUnitSuite {
@@ -189,118 +230,41 @@ class RequestResponseSerializationTest extends JUnitSuite {
   private val offsetResponse = SerializationTestUtils.createTestOffsetResponse
   private val topicMetadataRequest = SerializationTestUtils.createTestTopicMetadataRequest
   private val topicMetadataResponse = SerializationTestUtils.createTestTopicMetadataResponse
-  private val offsetCommitRequest = SerializationTestUtils.createTestOffsetCommitRequest
+  private val offsetCommitRequestV0 = SerializationTestUtils.createTestOffsetCommitRequestV0
+  private val offsetCommitRequestV1 = SerializationTestUtils.createTestOffsetCommitRequestV1
   private val offsetCommitResponse = SerializationTestUtils.createTestOffsetCommitResponse
   private val offsetFetchRequest = SerializationTestUtils.createTestOffsetFetchRequest
   private val offsetFetchResponse = SerializationTestUtils.createTestOffsetFetchResponse
-
+  private val consumerMetadataRequest = SerializationTestUtils.createConsumerMetadataRequest
+  private val consumerMetadataResponse = SerializationTestUtils.createConsumerMetadataResponse
+  private val consumerMetadataResponseNoCoordinator = ConsumerMetadataResponse(None, ErrorMapping.ConsumerCoordinatorNotAvailableCode)
+  private val heartbeatRequest = SerializationTestUtils.createHeartbeatRequestAndHeader
+  private val heartbeatResponse = SerializationTestUtils.createHeartbeatResponseAndHeader
+  private val joinGroupRequest = SerializationTestUtils.createJoinGroupRequestAndHeader
+  private val joinGroupResponse = SerializationTestUtils.createJoinGroupResponseAndHeader
 
   @Test
   def testSerializationAndDeserialization() {
-    var buffer: ByteBuffer = ByteBuffer.allocate(leaderAndIsrRequest.sizeInBytes())
-    leaderAndIsrRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedLeaderAndIsrRequest = LeaderAndIsrRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed leaderAndISRRequest should be the same", leaderAndIsrRequest,
-                 deserializedLeaderAndIsrRequest)
-
-    buffer = ByteBuffer.allocate(leaderAndIsrResponse.sizeInBytes())
-    leaderAndIsrResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedLeaderAndIsrResponse = LeaderAndIsrResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed leaderAndISRResponse should be the same", leaderAndIsrResponse,
-                 deserializedLeaderAndIsrResponse)
-
-    buffer = ByteBuffer.allocate(stopReplicaRequest.sizeInBytes())
-    stopReplicaRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedStopReplicaRequest = StopReplicaRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed stopReplicaRequest should be the same", stopReplicaRequest,
-                 deserializedStopReplicaRequest)
-
-    buffer = ByteBuffer.allocate(stopReplicaResponse.sizeInBytes())
-    stopReplicaResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedStopReplicaResponse = StopReplicaResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed stopReplicaResponse should be the same", stopReplicaResponse,
-                 deserializedStopReplicaResponse)
-
-    buffer = ByteBuffer.allocate(producerRequest.sizeInBytes)
-    producerRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedProducerRequest = ProducerRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed producerRequest should be the same", producerRequest,
-                 deserializedProducerRequest)
-
-    buffer = ByteBuffer.allocate(producerResponse.sizeInBytes)
-    producerResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedProducerResponse = ProducerResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed producerResponse should be the same: [%s], [%s]".format(producerResponse, deserializedProducerResponse), producerResponse,
-                 deserializedProducerResponse)
-
-    buffer = ByteBuffer.allocate(fetchRequest.sizeInBytes)
-    fetchRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedFetchRequest = FetchRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed fetchRequest should be the same", fetchRequest,
-                 deserializedFetchRequest)
-
-    buffer = ByteBuffer.allocate(offsetRequest.sizeInBytes)
-    offsetRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetRequest = OffsetRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetRequest should be the same", offsetRequest,
-                 deserializedOffsetRequest)
-
-    buffer = ByteBuffer.allocate(offsetResponse.sizeInBytes)
-    offsetResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetResponse = OffsetResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetResponse should be the same", offsetResponse,
-                 deserializedOffsetResponse)
-
-    buffer = ByteBuffer.allocate(topicMetadataRequest.sizeInBytes())
-    topicMetadataRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedTopicMetadataRequest = TopicMetadataRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed topicMetadataRequest should be the same", topicMetadataRequest,
-                 deserializedTopicMetadataRequest)
-
-    buffer = ByteBuffer.allocate(topicMetadataResponse.sizeInBytes)
-    topicMetadataResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedTopicMetadataResponse = TopicMetadataResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed topicMetadataResponse should be the same", topicMetadataResponse,
-                 deserializedTopicMetadataResponse)
-
-    buffer = ByteBuffer.allocate(offsetCommitRequest.sizeInBytes)
-    offsetCommitRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetCommitRequest = OffsetCommitRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetCommitRequest should be the same", offsetCommitRequest, 
-      deserializedOffsetCommitRequest)
-
-    buffer = ByteBuffer.allocate(offsetCommitResponse.sizeInBytes)
-    offsetCommitResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetCommitResponse = OffsetCommitResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetCommitResponse should be the same", offsetCommitResponse, 
-      deserializedOffsetCommitResponse)
-
-    buffer = ByteBuffer.allocate(offsetFetchRequest.sizeInBytes)
-    offsetFetchRequest.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetFetchRequest = OffsetFetchRequest.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetFetchRequest should be the same", offsetFetchRequest, 
-      deserializedOffsetFetchRequest)
-
-    buffer = ByteBuffer.allocate(offsetFetchResponse.sizeInBytes)
-    offsetFetchResponse.writeTo(buffer)
-    buffer.rewind()
-    val deserializedOffsetFetchResponse = OffsetFetchResponse.readFrom(buffer)
-    assertEquals("The original and deserialzed offsetFetchResponse should be the same", offsetFetchResponse, 
-      deserializedOffsetFetchResponse)
 
+    val requestsAndResponses =
+      collection.immutable.Seq(leaderAndIsrRequest, leaderAndIsrResponse, stopReplicaRequest,
+                               stopReplicaResponse, producerRequest, producerResponse,
+                               fetchRequest, offsetRequest, offsetResponse, topicMetadataRequest,
+                               topicMetadataResponse, offsetCommitRequestV0, offsetCommitRequestV1,
+                               offsetCommitResponse, offsetFetchRequest, offsetFetchResponse,
+                               consumerMetadataRequest, consumerMetadataResponse,
+                               consumerMetadataResponseNoCoordinator, heartbeatRequest,
+                               heartbeatResponse, joinGroupRequest, joinGroupResponse)
+
+    requestsAndResponses.foreach { original =>
+      val buffer = ByteBuffer.allocate(original.sizeInBytes)
+      original.writeTo(buffer)
+      buffer.rewind()
+      val deserializer = original.getClass.getDeclaredMethod("readFrom", classOf[ByteBuffer])
+      val deserialized = deserializer.invoke(null, buffer)
+      assertFalse("All serialized bytes in " + original.getClass.getSimpleName + " should have been consumed",
+                  buffer.hasRemaining)
+      assertEquals("The original and deserialized for " + original.getClass.getSimpleName + " should be the same.", original, deserialized)
+    }
   }
 }
diff --git a/core/src/test/scala/unit/kafka/common/ConfigTest.scala b/core/src/test/scala/unit/kafka/common/ConfigTest.scala
index 74118f4cbf7fe..7bff96caca1dc 100644
--- a/core/src/test/scala/unit/kafka/common/ConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/common/ConfigTest.scala
@@ -29,7 +29,7 @@ class ConfigTest {
   @Test
   def testInvalidClientIds() {
     val invalidClientIds = new ArrayBuffer[String]()
-    val badChars = Array('/', '\\', ',', '\0', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
+    val badChars = Array('/', '\\', ',', '\u0000', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
     for (weirdChar <- badChars) {
       invalidClientIds += "Is" + weirdChar + "illegal"
     }
@@ -59,7 +59,7 @@ class ConfigTest {
   @Test
   def testInvalidGroupIds() {
     val invalidGroupIds = new ArrayBuffer[String]()
-    val badChars = Array('/', '\\', ',', '\0', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
+    val badChars = Array('/', '\\', ',', '\u0000', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
     for (weirdChar <- badChars) {
       invalidGroupIds += "Is" + weirdChar + "illegal"
     }
diff --git a/core/src/test/scala/unit/kafka/common/TopicTest.scala b/core/src/test/scala/unit/kafka/common/TopicTest.scala
index c8f8f4d8715da..0fb25880c24ad 100644
--- a/core/src/test/scala/unit/kafka/common/TopicTest.scala
+++ b/core/src/test/scala/unit/kafka/common/TopicTest.scala
@@ -32,7 +32,7 @@ class TopicTest {
     for (i <- 1 to 6)
       longName += longName
     invalidTopicNames += longName
-    val badChars = Array('/', '\\', ',', '\0', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
+    val badChars = Array('/', '\\', ',', '\u0000', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=')
     for (weirdChar <- badChars) {
       invalidTopicNames += "Is" + weirdChar + "illegal"
     }
diff --git a/core/src/test/scala/unit/kafka/consumer/ConsumerIteratorTest.scala b/core/src/test/scala/unit/kafka/consumer/ConsumerIteratorTest.scala
index 9347ea60f2c02..c0355cc0135c6 100644
--- a/core/src/test/scala/unit/kafka/consumer/ConsumerIteratorTest.scala
+++ b/core/src/test/scala/unit/kafka/consumer/ConsumerIteratorTest.scala
@@ -20,7 +20,6 @@ package kafka.consumer
 
 import java.util.concurrent._
 import java.util.concurrent.atomic._
-import java.util.Properties
 import scala.collection._
 import junit.framework.Assert._
 
@@ -28,7 +27,6 @@ import kafka.message._
 import kafka.server._
 import kafka.utils.TestUtils._
 import kafka.utils._
-import kafka.admin.AdminUtils
 import org.junit.Test
 import kafka.serializer._
 import kafka.cluster.{Broker, Cluster}
@@ -61,8 +59,7 @@ class ConsumerIteratorTest extends JUnit3Suite with KafkaServerTestHarness {
 
   override def setUp() {
     super.setUp
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0 -> Seq(configs.head.brokerId)), new Properties)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    createTopic(zkClient, topic, partitionReplicaAssignment = Map(0 -> Seq(configs.head.brokerId)), servers = servers)
   }
 
   @Test
@@ -83,7 +80,7 @@ class ConsumerIteratorTest extends JUnit3Suite with KafkaServerTestHarness {
     val receivedMessages = (0 until 5).map(i => iter.next.message).toList
 
     assertFalse(iter.hasNext)
-    assertEquals(1, queue.size) // This is only the shutdown command.
+    assertEquals(0, queue.size) // Shutdown command has been consumed.
     assertEquals(5, receivedMessages.size)
     val unconsumed = messageSet.filter(_.offset >= consumedOffset).map(m => Utils.readString(m.message.payload))
     assertEquals(unconsumed, receivedMessages)
diff --git a/core/src/test/scala/unit/kafka/consumer/PartitionAssignorTest.scala b/core/src/test/scala/unit/kafka/consumer/PartitionAssignorTest.scala
new file mode 100644
index 0000000000000..24954de66ccc5
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/consumer/PartitionAssignorTest.scala
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package unit.kafka.consumer
+
+import org.scalatest.junit.JUnit3Suite
+import org.easymock.EasyMock
+import org.I0Itec.zkclient.ZkClient
+import org.apache.zookeeper.data.Stat
+import kafka.consumer._
+import kafka.utils.{TestUtils, Logging, ZkUtils, Json}
+import junit.framework.Assert._
+import kafka.common.TopicAndPartition
+import unit.kafka.consumer.PartitionAssignorTest.StaticSubscriptionInfo
+import kafka.consumer.ConsumerThreadId
+import unit.kafka.consumer.PartitionAssignorTest.Scenario
+import unit.kafka.consumer.PartitionAssignorTest.WildcardSubscriptionInfo
+
+class PartitionAssignorTest extends JUnit3Suite with Logging {
+
+  def testRoundRobinPartitionAssignor() {
+    val assignor = new RoundRobinAssignor
+
+    /** various scenarios with only wildcard consumers */
+    (1 to PartitionAssignorTest.TestCaseCount).foreach (testCase => {
+      val consumerCount = 1.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxConsumerCount + 1))
+      val topicCount = PartitionAssignorTest.MinTopicCount.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxTopicCount + 1))
+
+      val topicPartitionCounts = Map((1 to topicCount).map(topic => {
+        ("topic-" + topic, PartitionAssignorTest.MinPartitionCount.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxPartitionCount)))
+      }).toSeq:_*)
+      
+      val subscriptions = Map((1 to consumerCount).map(consumer => {
+        val streamCount = 1.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxStreamCount + 1))
+        ("g1c" + consumer, WildcardSubscriptionInfo(streamCount, ".*", isWhitelist = true))
+      }).toSeq:_*)
+      val scenario = Scenario("g1", topicPartitionCounts, subscriptions)
+      val zkClient = PartitionAssignorTest.setupZkClientMock(scenario)
+      EasyMock.replay(zkClient)
+      PartitionAssignorTest.assignAndVerify(scenario, assignor, zkClient, verifyAssignmentIsUniform = true)
+    })
+  }
+
+  def testRangePartitionAssignor() {
+    val assignor = new RangeAssignor
+    (1 to PartitionAssignorTest.TestCaseCount).foreach (testCase => {
+      val consumerCount = 1.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxConsumerCount + 1))
+      val topicCount = PartitionAssignorTest.MinTopicCount.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxTopicCount + 1))
+
+      val topicPartitionCounts = Map((1 to topicCount).map(topic => {
+        ("topic-" + topic, PartitionAssignorTest.MinPartitionCount.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxPartitionCount)))
+      }).toSeq:_*)
+
+      val subscriptions = Map((1 to consumerCount).map(consumer => {
+        val streamCounts = Map((1 to topicCount).map(topic => {
+            val streamCount = 1.max(TestUtils.random.nextInt(PartitionAssignorTest.MaxStreamCount + 1))
+            ("topic-" + topic, streamCount)
+          }).toSeq:_*)
+        ("g1c" + consumer, StaticSubscriptionInfo(streamCounts))
+      }).toSeq:_*)
+      val scenario = Scenario("g1", topicPartitionCounts, subscriptions)
+      val zkClient = PartitionAssignorTest.setupZkClientMock(scenario)
+      EasyMock.replay(zkClient)
+
+      PartitionAssignorTest.assignAndVerify(scenario, assignor, zkClient)
+    })
+  }
+}
+
+private object PartitionAssignorTest extends Logging {
+
+  private val TestCaseCount = 3
+  private val MaxConsumerCount = 10
+  private val MaxStreamCount = 8
+  private val MaxTopicCount = 100
+  private val MinTopicCount = 0
+  private val MaxPartitionCount = 120
+  private val MinPartitionCount = 8
+
+  private trait SubscriptionInfo {
+    def registrationString: String
+  }
+
+  private case class StaticSubscriptionInfo(streamCounts: Map[String, Int]) extends SubscriptionInfo {
+    def registrationString =
+      Json.encode(Map("version" -> 1,
+                      "subscription" -> streamCounts,
+                      "pattern" -> "static",
+                      "timestamp" -> 1234.toString))
+
+    override def toString = {
+      "Stream counts: " + streamCounts
+    }
+  }
+
+  private case class WildcardSubscriptionInfo(streamCount: Int, regex: String, isWhitelist: Boolean)
+          extends SubscriptionInfo {
+    def registrationString =
+      Json.encode(Map("version" -> 1,
+                      "subscription" -> Map(regex -> streamCount),
+                      "pattern" -> (if (isWhitelist) "white_list" else "black_list")))
+
+    override def toString = {
+      "\"%s\":%d (%s)".format(regex, streamCount, if (isWhitelist) "whitelist" else "blacklist")
+    }
+  }
+
+  private case class Scenario(group: String,
+                              topicPartitionCounts: Map[String, Int],
+                              /* consumerId -> SubscriptionInfo */
+                              subscriptions: Map[String, SubscriptionInfo]) {
+    override def toString = {
+      "\n" +
+      "Group                  : %s\n".format(group) +
+      "Topic partition counts : %s\n".format(topicPartitionCounts) +
+      "Consumer subscriptions : %s\n".format(subscriptions)
+    }
+  }
+
+  private def setupZkClientMock(scenario: Scenario) = {
+    val consumers = java.util.Arrays.asList(scenario.subscriptions.keys.toSeq:_*)
+
+    val zkClient = EasyMock.createStrictMock(classOf[ZkClient])
+    EasyMock.checkOrder(zkClient, false)
+
+    EasyMock.expect(zkClient.getChildren("/consumers/%s/ids".format(scenario.group))).andReturn(consumers)
+    EasyMock.expectLastCall().anyTimes()
+
+    scenario.subscriptions.foreach { case(consumerId, subscriptionInfo) =>
+      EasyMock.expect(zkClient.readData("/consumers/%s/ids/%s".format(scenario.group, consumerId), new Stat()))
+              .andReturn(subscriptionInfo.registrationString)
+      EasyMock.expectLastCall().anyTimes()
+    }
+
+    scenario.topicPartitionCounts.foreach { case(topic, partitionCount) =>
+      val replicaAssignment = Map((0 until partitionCount).map(partition => (partition.toString, Seq(0))):_*)
+      EasyMock.expect(zkClient.readData("/brokers/topics/%s".format(topic), new Stat()))
+              .andReturn(ZkUtils.replicaAssignmentZkData(replicaAssignment))
+      EasyMock.expectLastCall().anyTimes()
+    }
+
+    EasyMock.expect(zkClient.getChildren("/brokers/topics")).andReturn(
+      java.util.Arrays.asList(scenario.topicPartitionCounts.keys.toSeq:_*))
+    EasyMock.expectLastCall().anyTimes()
+
+    zkClient
+  }
+
+  private def assignAndVerify(scenario: Scenario, assignor: PartitionAssignor, zkClient: ZkClient,
+                              verifyAssignmentIsUniform: Boolean = false) {
+    val assignments = scenario.subscriptions.map{ case(consumer, subscription)  =>
+      val ctx = new AssignmentContext("g1", consumer, excludeInternalTopics = true, zkClient)
+      assignor.assign(ctx)
+    }
+
+    // check for uniqueness (i.e., any partition should be assigned to exactly one consumer stream)
+    val globalAssignment = collection.mutable.Map[TopicAndPartition, ConsumerThreadId]()
+    assignments.foreach(assignment => {
+      assignment.foreach { case(topicPartition, owner) =>
+        val previousOwnerOpt = globalAssignment.put(topicPartition, owner)
+        assertTrue("Scenario %s: %s is assigned to two owners.".format(scenario, topicPartition), previousOwnerOpt.isEmpty)
+      }
+    })
+
+    // check for coverage (i.e., all given partitions are owned)
+    val assignedPartitions = globalAssignment.keySet
+    val givenPartitions = scenario.topicPartitionCounts.flatMap{ case (topic, partitionCount) =>
+      (0 until partitionCount).map(partition => TopicAndPartition(topic, partition))
+    }.toSet
+    assertTrue("Scenario %s: the list of given partitions and assigned partitions are different.".format(scenario),
+      givenPartitions == assignedPartitions)
+
+    // check for uniform assignment
+    if (verifyAssignmentIsUniform) {
+      val partitionCountForStream = partitionCountPerStream(globalAssignment)
+      val maxCount = partitionCountForStream.valuesIterator.max
+      val minCount = partitionCountForStream.valuesIterator.min
+      assertTrue("Scenario %s: assignment is not uniform (partition counts per stream are in the range [%d, %d])"
+                 .format(scenario, minCount, maxCount), (maxCount - minCount) <= 1)
+    }
+  }
+
+  /** For each consumer stream, count the number of partitions that it owns. */
+  private def partitionCountPerStream(assignment: collection.Map[TopicAndPartition, ConsumerThreadId]) = {
+    val ownedCounts = collection.mutable.Map[ConsumerThreadId, Int]()
+    assignment.foreach { case (topicPartition, owner) =>
+      val updatedCount = ownedCounts.getOrElse(owner, 0) + 1
+      ownedCounts.put(owner, updatedCount)
+    }
+    ownedCounts
+  }
+}
+
diff --git a/core/src/test/scala/unit/kafka/consumer/TopicFilterTest.scala b/core/src/test/scala/unit/kafka/consumer/TopicFilterTest.scala
index cf2724bb68d39..4f124af5c3e94 100644
--- a/core/src/test/scala/unit/kafka/consumer/TopicFilterTest.scala
+++ b/core/src/test/scala/unit/kafka/consumer/TopicFilterTest.scala
@@ -21,6 +21,7 @@ package kafka.consumer
 import junit.framework.Assert._
 import org.scalatest.junit.JUnitSuite
 import org.junit.Test
+import kafka.server.OffsetManager
 
 
 class TopicFilterTest extends JUnitSuite {
@@ -29,19 +30,56 @@ class TopicFilterTest extends JUnitSuite {
   def testWhitelists() {
 
     val topicFilter1 = new Whitelist("white1,white2")
-    assertTrue(topicFilter1.isTopicAllowed("white2"))
-    assertFalse(topicFilter1.isTopicAllowed("black1"))
+    assertTrue(topicFilter1.isTopicAllowed("white2", excludeInternalTopics = true))
+    assertTrue(topicFilter1.isTopicAllowed("white2", excludeInternalTopics = false))
+    assertFalse(topicFilter1.isTopicAllowed("black1", excludeInternalTopics = true))
+    assertFalse(topicFilter1.isTopicAllowed("black1", excludeInternalTopics = false))
 
     val topicFilter2 = new Whitelist(".+")
-    assertTrue(topicFilter2.isTopicAllowed("alltopics"))
-    
+    assertTrue(topicFilter2.isTopicAllowed("alltopics", excludeInternalTopics = true))
+    assertFalse(topicFilter2.isTopicAllowed(OffsetManager.OffsetsTopicName, excludeInternalTopics = true))
+    assertTrue(topicFilter2.isTopicAllowed(OffsetManager.OffsetsTopicName, excludeInternalTopics = false))
+
     val topicFilter3 = new Whitelist("white_listed-topic.+")
-    assertTrue(topicFilter3.isTopicAllowed("white_listed-topic1"))
-    assertFalse(topicFilter3.isTopicAllowed("black1"))
+    assertTrue(topicFilter3.isTopicAllowed("white_listed-topic1", excludeInternalTopics = true))
+    assertFalse(topicFilter3.isTopicAllowed("black1", excludeInternalTopics = true))
+
+    val topicFilter4 = new Whitelist("test-(?!bad\\b)[\\w]+")
+    assertTrue(topicFilter4.isTopicAllowed("test-good", excludeInternalTopics = true))
+    assertFalse(topicFilter4.isTopicAllowed("test-bad", excludeInternalTopics = true))    
   }
 
   @Test
   def testBlacklists() {
     val topicFilter1 = new Blacklist("black1")
+    assertTrue(topicFilter1.isTopicAllowed("white2", excludeInternalTopics = true))
+    assertTrue(topicFilter1.isTopicAllowed("white2", excludeInternalTopics = false))
+    assertFalse(topicFilter1.isTopicAllowed("black1", excludeInternalTopics = true))
+    assertFalse(topicFilter1.isTopicAllowed("black1", excludeInternalTopics = false))
+
+    assertFalse(topicFilter1.isTopicAllowed(OffsetManager.OffsetsTopicName, excludeInternalTopics = true))
+    assertTrue(topicFilter1.isTopicAllowed(OffsetManager.OffsetsTopicName, excludeInternalTopics = false))
   }
+
+  @Test
+  def testWildcardTopicCountGetTopicCountMapEscapeJson() {
+    def getTopicCountMapKey(regex: String): String = {
+      val topicCount = new WildcardTopicCount(null, "consumerId", new Whitelist(regex), 1, true)
+      topicCount.getTopicCountMap.head._1
+    }
+    //lets make sure that the JSON strings are escaping as we expect
+    //if they are not then when they get saved to zookeeper and read back out they will be broken on parse
+    assertEquals("-\\\"-", getTopicCountMapKey("-\"-"))
+    assertEquals("-\\\\-", getTopicCountMapKey("-\\-"))
+    assertEquals("-\\/-", getTopicCountMapKey("-/-"))
+    assertEquals("-\\\\b-", getTopicCountMapKey("-\\b-"))
+    assertEquals("-\\\\f-", getTopicCountMapKey("-\\f-"))
+    assertEquals("-\\\\n-", getTopicCountMapKey("-\\n-"))
+    assertEquals("-\\\\r-", getTopicCountMapKey("-\\r-"))
+    assertEquals("-\\\\t-", getTopicCountMapKey("-\\t-"))
+    assertEquals("-\\\\u0000-", getTopicCountMapKey("-\\u0000-"))
+    assertEquals("-\\\\u001f-", getTopicCountMapKey("-\\u001f-"))
+    assertEquals("-\\\\u007f-", getTopicCountMapKey("-\\u007f-"))
+    assertEquals("-\\\\u009f-", getTopicCountMapKey("-\\u009f-"))
+  }    
 }
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/consumer/ZookeeperConsumerConnectorTest.scala b/core/src/test/scala/unit/kafka/consumer/ZookeeperConsumerConnectorTest.scala
index 8fe7259c9abd2..a17e8532c44aa 100644
--- a/core/src/test/scala/unit/kafka/consumer/ZookeeperConsumerConnectorTest.scala
+++ b/core/src/test/scala/unit/kafka/consumer/ZookeeperConsumerConnectorTest.scala
@@ -1,4 +1,3 @@
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -6,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -20,18 +19,20 @@ package kafka.consumer
 
 import junit.framework.Assert._
 import kafka.integration.KafkaServerTestHarness
+import kafka.javaapi.consumer.ConsumerRebalanceListener
 import kafka.server._
 import scala.collection._
+import scala.collection.JavaConversions._
 import org.scalatest.junit.JUnit3Suite
 import kafka.message._
 import kafka.serializer._
-import kafka.admin.AdminUtils
 import org.I0Itec.zkclient.ZkClient
 import kafka.utils._
-import kafka.producer.{ProducerConfig, KeyedMessage, Producer}
+import kafka.producer.{KeyedMessage, Producer}
 import java.util.{Collections, Properties}
 import org.apache.log4j.{Logger, Level}
 import kafka.utils.TestUtils._
+import kafka.common.{TopicAndPartition, MessageStreamsExistException}
 
 class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHarness with Logging {
 
@@ -90,15 +91,15 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     zkConsumerConnector0.shutdown
 
     // send some messages to each broker
-    val sentMessages1 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages)
+    val sentMessages1 = sendMessagesToPartition(configs, topic, 0, nMessages) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages)
 
     // wait to make sure the topic and partition have a leader for the successful case
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1)
 
     // create a consumer
     val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer1))
@@ -115,7 +116,7 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     assertEquals(expected_1, actual_1)
 
     // commit consumed offsets
-    zkConsumerConnector1.commitOffsets
+    zkConsumerConnector1.commitOffsets(true)
 
     // create a consumer
     val consumerConfig2 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer2)) {
@@ -124,11 +125,11 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val zkConsumerConnector2 = new ZookeeperConsumerConnector(consumerConfig2, true)
     val topicMessageStreams2 = zkConsumerConnector2.createMessageStreams(Map(topic -> 1), new StringDecoder(), new StringDecoder())
     // send some messages to each broker
-    val sentMessages2 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages)
+    val sentMessages2 = sendMessagesToPartition(configs, topic, 0, nMessages) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages)
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
     val receivedMessages2 = getMessages(nMessages, topicMessageStreams1) ++ getMessages(nMessages, topicMessageStreams2)
     assertEquals(sentMessages2.sorted, receivedMessages2.sorted)
@@ -145,11 +146,11 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val zkConsumerConnector3 = new ZookeeperConsumerConnector(consumerConfig3, true)
     val topicMessageStreams3 = zkConsumerConnector3.createMessageStreams(new mutable.HashMap[String, Int]())
     // send some messages to each broker
-    val sentMessages3 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages)
+    val sentMessages3 = sendMessagesToPartition(configs, topic, 0, nMessages) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages)
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
     val receivedMessages3 = getMessages(nMessages, topicMessageStreams1) ++ getMessages(nMessages, topicMessageStreams2)
     assertEquals(sentMessages3.sorted, receivedMessages3.sorted)
@@ -158,6 +159,14 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val actual_3 = getZKChildrenValues(dirs.consumerOwnerDir)
     assertEquals(expected_2, actual_3)
 
+    // call createMesssageStreams twice should throw MessageStreamsExistException
+    try {
+      val topicMessageStreams4 = zkConsumerConnector3.createMessageStreams(new mutable.HashMap[String, Int]())
+      fail("Should fail with MessageStreamsExistException")
+    } catch {
+      case e: MessageStreamsExistException => // expected
+    }
+
     zkConsumerConnector1.shutdown
     zkConsumerConnector2.shutdown
     zkConsumerConnector3.shutdown
@@ -165,19 +174,20 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     requestHandlerLogger.setLevel(Level.ERROR)
   }
 
+
   def testCompression() {
     val requestHandlerLogger = Logger.getLogger(classOf[kafka.server.KafkaRequestHandler])
     requestHandlerLogger.setLevel(Level.FATAL)
 
     // send some messages to each broker
-    val sentMessages1 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages, GZIPCompressionCodec) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages, GZIPCompressionCodec)
+    val sentMessages1 = sendMessagesToPartition(configs, topic, 0, nMessages, GZIPCompressionCodec) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages, GZIPCompressionCodec)
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1)
 
     // create a consumer
     val consumerConfig1 = new ConsumerConfig(
@@ -194,7 +204,7 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     assertEquals(expected_1, actual_1)
 
     // commit consumed offsets
-    zkConsumerConnector1.commitOffsets
+    zkConsumerConnector1.commitOffsets(true)
 
     // create a consumer
     val consumerConfig2 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer2)) {
@@ -203,11 +213,11 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val zkConsumerConnector2 = new ZookeeperConsumerConnector(consumerConfig2, true)
     val topicMessageStreams2 = zkConsumerConnector2.createMessageStreams(Map(topic -> 1), new StringDecoder(), new StringDecoder())
     // send some messages to each broker
-    val sentMessages2 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages, GZIPCompressionCodec) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages, GZIPCompressionCodec)
+    val sentMessages2 = sendMessagesToPartition(configs, topic, 0, nMessages, GZIPCompressionCodec) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages, GZIPCompressionCodec)
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
     val receivedMessages2 = getMessages(nMessages, topicMessageStreams1) ++ getMessages(nMessages, topicMessageStreams2)
     assertEquals(sentMessages2.sorted, receivedMessages2.sorted)
@@ -224,11 +234,11 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val zkConsumerConnector3 = new ZookeeperConsumerConnector(consumerConfig3, true)
     val topicMessageStreams3 = zkConsumerConnector3.createMessageStreams(new mutable.HashMap[String, Int](), new StringDecoder(), new StringDecoder())
     // send some messages to each broker
-    val sentMessages3 = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages, GZIPCompressionCodec) ++
-                        sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages, GZIPCompressionCodec)
+    val sentMessages3 = sendMessagesToPartition(configs, topic, 0, nMessages, GZIPCompressionCodec) ++
+                        sendMessagesToPartition(configs, topic, 1, nMessages, GZIPCompressionCodec)
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
     val receivedMessages3 = getMessages(nMessages, topicMessageStreams1) ++ getMessages(nMessages, topicMessageStreams2)
     assertEquals(sentMessages3.sorted, receivedMessages3.sorted)
@@ -246,11 +256,11 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
 
   def testCompressionSetConsumption() {
     // send some messages to each broker
-    val sentMessages = sendMessagesToBrokerPartition(configs.head, topic, 0, 200, DefaultCompressionCodec) ++
-                       sendMessagesToBrokerPartition(configs.last, topic, 1, 200, DefaultCompressionCodec)
+    val sentMessages = sendMessagesToPartition(configs, topic, 0, 200, DefaultCompressionCodec) ++
+                       sendMessagesToPartition(configs, topic, 1, 200, DefaultCompressionCodec)
 
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1)
 
     val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer0))
     val zkConsumerConnector1 = new ZookeeperConsumerConnector(consumerConfig1, true)
@@ -272,16 +282,16 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     requestHandlerLogger.setLevel(Level.FATAL)
 
     // send some messages to each broker
-    val sentMessages = sendMessagesToBrokerPartition(configs.head, topic, 0, nMessages, NoCompressionCodec) ++
-                       sendMessagesToBrokerPartition(configs.last, topic, 1, nMessages, NoCompressionCodec)
+    val sentMessages = sendMessagesToPartition(configs, topic, 0, nMessages, NoCompressionCodec) ++
+                       sendMessagesToPartition(configs, topic, 1, nMessages, NoCompressionCodec)
 
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 1)
 
     val consumerConfig = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer1))
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1)
 
     val zkConsumerConnector =
       new ZookeeperConsumerConnector(consumerConfig, true)
@@ -310,12 +320,10 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val zkClient = new ZkClient(zookeeperConnect, 6000, 30000, ZKStringSerializer)
 
     // create topic topic1 with 1 partition on broker 0
-    AdminUtils.createTopic(zkClient, topic, 1, 1)
+    createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 1, servers = servers)
 
     // send some messages to each broker
-    val sentMessages1 = sendMessages(configs.head, nMessages, "batch1", NoCompressionCodec, 1)
-
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
+    val sentMessages1 = sendMessages(configs, topic, "producer1", nMessages, "batch1", NoCompressionCodec, 1)
 
     // create a consumer
     val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer1))
@@ -339,70 +347,48 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     zkClient.close()
   }
 
-  def sendMessagesToBrokerPartition(config: KafkaConfig,
-                                    topic: String,
-                                    partition: Int,
-                                    numMessages: Int,
-                                    compression: CompressionCodec = NoCompressionCodec): List[String] = {
-    val header = "test-%d-%d".format(config.brokerId, partition)
-    val props = new Properties()
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(configs))
-    props.put("partitioner.class", "kafka.utils.FixedValuePartitioner")
-    props.put("compression.codec", compression.codec.toString)
-    props.put("key.serializer.class", classOf[IntEncoder].getName.toString)
-    props.put("serializer.class", classOf[StringEncoder].getName.toString)
-    val producer: Producer[Int, String] = new Producer[Int, String](new ProducerConfig(props))
-    val ms = 0.until(numMessages).map(x => header + config.brokerId + "-" + partition + "-" + x)
-    producer.send(ms.map(m => new KeyedMessage[Int, String](topic, partition, m)):_*)
-    debug("Sent %d messages to broker %d for partition [%s,%d]".format(ms.size, config.brokerId, topic, partition))
-    producer.close()
-    ms.toList
-  }
+  def testConsumerRebalanceListener() {
+    // Send messages to create topic
+    sendMessagesToPartition(configs, topic, 0, nMessages)
+    sendMessagesToPartition(configs, topic, 1, nMessages)
 
-  def sendMessages(config: KafkaConfig, 
-                   messagesPerNode: Int, 
-                   header: String, 
-                   compression: CompressionCodec, 
-                   numParts: Int): List[String]= {
-    var messages: List[String] = Nil
-    val props = new Properties()
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(configs))
-    props.put("partitioner.class", "kafka.utils.FixedValuePartitioner")
-    props.put("key.serializer.class", classOf[IntEncoder].getName.toString)
-    props.put("serializer.class", classOf[StringEncoder].getName)
-    val producer: Producer[Int, String] = new Producer[Int, String](new ProducerConfig(props))
-    for (partition <- 0 until numParts) {
-      val ms = 0.until(messagesPerNode).map(x => header + config.brokerId + "-" + partition + "-" + x)
-      producer.send(ms.map(m => new KeyedMessage[Int, String](topic, partition, m)):_*)
-      messages ++= ms
-      debug("Sent %d messages to broker %d for partition [%s,%d]".format(ms.size, config.brokerId, topic, partition))
-    }
-    producer.close()
-    messages
-  }
+    val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer1))
+    val zkConsumerConnector1 = new ZookeeperConsumerConnector(consumerConfig1, true)
+    // Register consumer rebalance listener
+    val rebalanceListener1 = new TestConsumerRebalanceListener()
+    zkConsumerConnector1.setConsumerRebalanceListener(rebalanceListener1)
+    val topicMessageStreams1 = zkConsumerConnector1.createMessageStreams(Map(topic -> 1), new StringDecoder(), new StringDecoder())
 
-  def sendMessages(messagesPerNode: Int, header: String, compression: CompressionCodec = NoCompressionCodec): List[String]= {
-    var messages: List[String] = Nil
-    for(conf <- configs)
-      messages ++= sendMessages(conf, messagesPerNode, header, compression, numParts)
-    messages
-  }
+    // Check if rebalance listener is fired
+    assertEquals(true, rebalanceListener1.listenerCalled)
+    assertEquals(null, rebalanceListener1.partitionOwnership.get(topic))
+    // reset the flag
+    rebalanceListener1.listenerCalled = false
 
-  def getMessages(nMessagesPerThread: Int, 
-                  topicMessageStreams: Map[String,List[KafkaStream[String, String]]]): List[String]= {
-    var messages: List[String] = Nil
-    for((topic, messageStreams) <- topicMessageStreams) {
-      for (messageStream <- messageStreams) {
-        val iterator = messageStream.iterator
-        for(i <- 0 until nMessagesPerThread) {
-          assertTrue(iterator.hasNext)
-          val message = iterator.next.message
-          messages ::= message
-          debug("received message: " + message)
-        }
-      }
-    }
-    messages.reverse
+    val actual_1 = getZKChildrenValues(dirs.consumerOwnerDir)
+    val expected_1 = List(("0", "group1_consumer1-0"),
+                          ("1", "group1_consumer1-0"))
+    assertEquals(expected_1, actual_1)
+
+    val consumerConfig2 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumer2))
+    val zkConsumerConnector2 = new ZookeeperConsumerConnector(consumerConfig2, true)
+    // Register consumer rebalance listener
+    val rebalanceListener2 = new TestConsumerRebalanceListener()
+    zkConsumerConnector2.setConsumerRebalanceListener(rebalanceListener2)
+    val topicMessageStreams2 = zkConsumerConnector2.createMessageStreams(Map(topic -> 1), new StringDecoder(), new StringDecoder())
+
+    val actual_2 = getZKChildrenValues(dirs.consumerOwnerDir)
+    val expected_2 = List(("0", "group1_consumer1-0"),
+                          ("1", "group1_consumer2-0"))
+    assertEquals(expected_2, actual_2)
+
+    // Check if rebalance listener is fired
+    assertEquals(true, rebalanceListener1.listenerCalled)
+    assertEquals(Set[Int](0, 1), rebalanceListener1.partitionOwnership.get(topic))
+    assertEquals(true, rebalanceListener2.listenerCalled)
+    assertEquals(null, rebalanceListener2.partitionOwnership.get(topic))
+    zkConsumerConnector1.shutdown()
+    zkConsumerConnector2.shutdown()
   }
 
   def getZKChildrenValues(path : String) : Seq[Tuple2[String,String]] = {
@@ -416,6 +402,14 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
       (partition, zkClient.readData(path + "/" + partition).asInstanceOf[String]))
   }
 
-}
+  private class TestConsumerRebalanceListener extends ConsumerRebalanceListener {
+    var listenerCalled: Boolean = false
+    var partitionOwnership: java.util.Map[String, java.util.Set[java.lang.Integer]] = null
 
+    override def beforeReleasingPartitions(partitionOwnership: java.util.Map[String, java.util.Set[java.lang.Integer]]) {
+      listenerCalled = true
+      this.partitionOwnership = partitionOwnership
+    }
+  }
 
+}
diff --git a/core/src/test/scala/unit/kafka/integration/AutoOffsetResetTest.scala b/core/src/test/scala/unit/kafka/integration/AutoOffsetResetTest.scala
index e5703bc16b2ce..95303e098d40c 100644
--- a/core/src/test/scala/unit/kafka/integration/AutoOffsetResetTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/AutoOffsetResetTest.scala
@@ -17,22 +17,25 @@
 
 package kafka.integration
 
-import junit.framework.Assert._
 import kafka.utils.{ZKGroupTopicDirs, Logging}
 import kafka.consumer.{ConsumerTimeoutException, ConsumerConfig, ConsumerConnector, Consumer}
 import kafka.server._
-import org.apache.log4j.{Level, Logger}
-import org.scalatest.junit.JUnit3Suite
 import kafka.utils.TestUtils
 import kafka.serializer._
 import kafka.producer.{Producer, KeyedMessage}
 
+import org.junit.Test
+import org.apache.log4j.{Level, Logger}
+import org.scalatest.junit.JUnit3Suite
+import junit.framework.Assert._
+
 class AutoOffsetResetTest extends JUnit3Suite with KafkaServerTestHarness with Logging {
 
+  val configs = List(new KafkaConfig(TestUtils.createBrokerConfig(0)))
+
   val topic = "test_topic"
   val group = "default_group"
   val testConsumer = "consumer"
-  val configs = List(new KafkaConfig(TestUtils.createBrokerConfig(0)))
   val NumMessages = 10
   val LargeOffset = 10000
   val SmallOffset = -1
@@ -50,37 +53,40 @@ class AutoOffsetResetTest extends JUnit3Suite with KafkaServerTestHarness with L
     requestHandlerLogger.setLevel(Level.ERROR)
     super.tearDown
   }
-  
-  def testResetToEarliestWhenOffsetTooHigh() = 
+
+  @Test
+  def testResetToEarliestWhenOffsetTooHigh() =
     assertEquals(NumMessages, resetAndConsume(NumMessages, "smallest", LargeOffset))
-  
+
+  @Test
   def testResetToEarliestWhenOffsetTooLow() =
     assertEquals(NumMessages, resetAndConsume(NumMessages, "smallest", SmallOffset))
 
+  @Test
   def testResetToLatestWhenOffsetTooHigh() =
     assertEquals(0, resetAndConsume(NumMessages, "largest", LargeOffset))
 
+  @Test
   def testResetToLatestWhenOffsetTooLow() =
     assertEquals(0, resetAndConsume(NumMessages, "largest", SmallOffset))
-  
+
   /* Produce the given number of messages, create a consumer with the given offset policy, 
    * then reset the offset to the given value and consume until we get no new messages. 
    * Returns the count of messages received.
    */
   def resetAndConsume(numMessages: Int, resetTo: String, offset: Long): Int = {
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
+    TestUtils.createTopic(zkClient, topic, 1, 1, servers)
 
-    val producer: Producer[String, Array[Byte]] = TestUtils.createProducer(TestUtils.getBrokerListStrFromConfigs(configs), 
-        new DefaultEncoder(), new StringEncoder())
+    val producer: Producer[String, Array[Byte]] = TestUtils.createProducer(
+      TestUtils.getBrokerListStrFromConfigs(configs),
+      keyEncoder = classOf[StringEncoder].getName)
 
     for(i <- 0 until numMessages)
       producer.send(new KeyedMessage[String, Array[Byte]](topic, topic, "test".getBytes))
 
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-
     // update offset in zookeeper for consumer to jump "forward" in time
     val dirs = new ZKGroupTopicDirs(group, topic)
-    var consumerProps = TestUtils.createConsumerProperties(zkConnect, group, testConsumer)
+    val consumerProps = TestUtils.createConsumerProperties(zkConnect, group, testConsumer)
     consumerProps.put("auto.offset.reset", resetTo)
     consumerProps.put("consumer.timeout.ms", "2000")
     consumerProps.put("fetch.wait.max.ms", "0")
diff --git a/core/src/test/scala/unit/kafka/integration/FetcherTest.scala b/core/src/test/scala/unit/kafka/integration/FetcherTest.scala
index 47130d33c36fa..25845abbcad2e 100644
--- a/core/src/test/scala/unit/kafka/integration/FetcherTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/FetcherTest.scala
@@ -30,7 +30,6 @@ import kafka.serializer._
 import kafka.producer.{KeyedMessage, Producer}
 import kafka.utils.TestUtils._
 import kafka.utils.TestUtils
-import kafka.admin.AdminUtils
 
 class FetcherTest extends JUnit3Suite with KafkaServerTestHarness {
 
@@ -55,8 +54,8 @@ class FetcherTest extends JUnit3Suite with KafkaServerTestHarness {
 
   override def setUp() {
     super.setUp
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0 -> Seq(configs.head.brokerId)))
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    createTopic(zkClient, topic, partitionReplicaAssignment = Map(0 -> Seq(configs.head.brokerId)), servers = servers)
+
     fetcher = new ConsumerFetcherManager("consumer1", new ConsumerConfig(TestUtils.createConsumerProperties("", "", "")), zkClient)
     fetcher.stopConnections()
     fetcher.startConnections(topicInfos, cluster)
@@ -83,9 +82,9 @@ class FetcherTest extends JUnit3Suite with KafkaServerTestHarness {
   def sendMessages(messagesPerNode: Int): Int = {
     var count = 0
     for(conf <- configs) {
-      val producer: Producer[String, Array[Byte]] = TestUtils.createProducer(TestUtils.getBrokerListStrFromConfigs(configs),
-                                                                             new DefaultEncoder(),
-                                                                             new StringEncoder())
+      val producer: Producer[String, Array[Byte]] = TestUtils.createProducer(
+        TestUtils.getBrokerListStrFromConfigs(configs),
+        keyEncoder = classOf[StringEncoder].getName)
       val ms = 0.until(messagesPerNode).map(x => (conf.brokerId * 5 + x).toString.getBytes).toArray
       messages += conf.brokerId -> ms
       producer.send(ms.map(m => new KeyedMessage[String, Array[Byte]](topic, topic, m)):_*)
diff --git a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
index 194dd70919a5f..3cf7c9bcd6449 100644
--- a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
+++ b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
@@ -30,11 +30,13 @@ trait KafkaServerTestHarness extends JUnit3Suite with ZooKeeperTestHarness {
 
   val configs: List[KafkaConfig]
   var servers: List[KafkaServer] = null
+  var brokerList: String = null
 
   override def setUp() {
     super.setUp
     if(configs.size <= 0)
       throw new KafkaException("Must suply at least one server config.")
+    brokerList = TestUtils.getBrokerListStrFromConfigs(configs)
     servers = configs.map(TestUtils.createServer(_))
   }
 
diff --git a/core/src/test/scala/unit/kafka/integration/LazyInitProducerTest.scala b/core/src/test/scala/unit/kafka/integration/LazyInitProducerTest.scala
deleted file mode 100644
index c3c7631659fbd..0000000000000
--- a/core/src/test/scala/unit/kafka/integration/LazyInitProducerTest.scala
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.integration
-
-import kafka.api.FetchRequestBuilder
-import kafka.message.ByteBufferMessageSet
-import kafka.server.{KafkaRequestHandler, KafkaConfig}
-import org.apache.log4j.{Level, Logger}
-import org.scalatest.junit.JUnit3Suite
-import scala.collection._
-import kafka.utils._
-import kafka.common.{ErrorMapping, KafkaException, OffsetOutOfRangeException}
-import kafka.producer.KeyedMessage
-import org.junit.Assert.assertEquals
-
-/**
- * End to end tests of the primitive apis against a local server
- */
-class LazyInitProducerTest extends JUnit3Suite with ProducerConsumerTestHarness {
-
-  val port = TestUtils.choosePort
-  val props = TestUtils.createBrokerConfig(0, port)
-  val config = new KafkaConfig(props)
-  val configs = List(config)
-  val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler])
-
-  override def setUp() {
-    super.setUp
-    if(configs.size <= 0)
-      throw new KafkaException("Must suply at least one server config.")
-
-    // temporarily set request handler logger to a higher level
-    requestHandlerLogger.setLevel(Level.FATAL)    
-  }
-
-  override def tearDown() {
-    // restore set request handler logger to a higher level
-    requestHandlerLogger.setLevel(Level.ERROR)
-
-    super.tearDown    
-  }
-  
-  def testProduceAndFetch() {
-    // send some messages
-    val topic = "test"
-    val sentMessages = List("hello", "there")
-    val producerData = sentMessages.map(m => new KeyedMessage[String, String](topic, topic, m))
-
-    producer.send(producerData:_*)
-
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-
-    var fetchedMessage: ByteBufferMessageSet = null
-    while(fetchedMessage == null || fetchedMessage.validBytes == 0) {
-      val fetched = consumer.fetch(new FetchRequestBuilder().addFetch(topic, 0, 0, 10000).build())
-      fetchedMessage = fetched.messageSet(topic, 0)
-    }
-    assertEquals(sentMessages, fetchedMessage.map(m => Utils.readString(m.message.payload)).toList)
-
-    // send an invalid offset
-    try {
-      val fetchedWithError = consumer.fetch(new FetchRequestBuilder().addFetch(topic, 0, -1, 10000).build())
-      fetchedWithError.data.values.foreach(pdata => ErrorMapping.maybeThrowException(pdata.error))
-      fail("Expected an OffsetOutOfRangeException exception to be thrown")
-    } catch {
-      case e: OffsetOutOfRangeException => 
-    }
-  }
-
-  def testProduceAndMultiFetch() {
-    // send some messages, with non-ordered topics
-    val topicOffsets = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
-    {
-      val messages = new mutable.HashMap[String, Seq[String]]
-      val builder = new FetchRequestBuilder()
-      for( (topic, offset) <- topicOffsets) {
-        val producedData = List("a_" + topic, "b_" + topic)
-        messages += topic -> producedData
-        producer.send(producedData.map(m => new KeyedMessage[String, String](topic, topic, m)):_*)
-        TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-        builder.addFetch(topic, offset, 0, 10000)
-      }
-
-      // wait a bit for produced message to be available
-      val request = builder.build()
-      val response = consumer.fetch(request)
-      for( (topic, offset) <- topicOffsets) {
-        val fetched = response.messageSet(topic, offset)
-        assertEquals(messages(topic), fetched.map(m => Utils.readString(m.message.payload)))
-      }
-    }
-
-    // send some invalid offsets
-    val builder = new FetchRequestBuilder()
-    for((topic, offset) <- topicOffsets)
-      builder.addFetch(topic, offset, -1, 10000)
-
-    val request = builder.build()
-    val responses = consumer.fetch(request)
-    responses.data.values.foreach(pd => {
-      try {
-        ErrorMapping.maybeThrowException(pd.error)
-        fail("Expected an OffsetOutOfRangeException exception to be thrown")
-      } catch {
-        case e: OffsetOutOfRangeException => // this is good
-      }
-    })
-  }
-
-  def testMultiProduce() {
-    // send some messages
-    val topics = List("test1", "test2", "test3");
-    val messages = new mutable.HashMap[String, Seq[String]]
-    val builder = new FetchRequestBuilder()
-    var produceList: List[KeyedMessage[String, String]] = Nil
-    for(topic <- topics) {
-      val set = List("a_" + topic, "b_" + topic)
-      messages += topic -> set
-      produceList ++= set.map(new KeyedMessage[String, String](topic, topic, _))
-      builder.addFetch(topic, 0, 0, 10000)
-    }
-    producer.send(produceList: _*)
-    topics.foreach(topic => TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000))
-
-    // wait a bit for produced message to be available
-    val request = builder.build()
-    val response = consumer.fetch(request)
-    for(topic <- topics) {
-      val fetched = response.messageSet(topic, 0)
-      assertEquals(messages(topic), fetched.map(m => Utils.readString(m.message.payload)))
-    }
-  }
-
-  def testMultiProduceResend() {
-    // send some messages
-    val topics = List("test1", "test2", "test3");
-    val messages = new mutable.HashMap[String, Seq[String]]
-    val builder = new FetchRequestBuilder()
-    var produceList: List[KeyedMessage[String, String]] = Nil
-    for(topic <- topics) {
-      val set = List("a_" + topic, "b_" + topic)
-      messages += topic -> set
-      produceList ++= set.map(new KeyedMessage[String, String](topic, topic, _))
-      builder.addFetch(topic, 0, 0, 10000)
-    }
-    producer.send(produceList: _*)
-    topics.foreach(topic => TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000))
-
-    producer.send(produceList: _*)
-    // wait a bit for produced message to be available
-    val request = builder.build()
-    val response = consumer.fetch(request)
-    for(topic <- topics) {
-      val topicMessages = response.messageSet(topic, 0)
-      assertEquals(messages(topic) ++ messages(topic), topicMessages.map(m => Utils.readString(m.message.payload)))
-    }
-  }
-}
diff --git a/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala b/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala
index 5f331d22bc99b..a5386a03b6295 100644
--- a/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala
@@ -21,7 +21,7 @@ import java.nio.ByteBuffer
 import junit.framework.Assert._
 import kafka.api.{PartitionFetchInfo, FetchRequest, FetchRequestBuilder}
 import kafka.server.{KafkaRequestHandler, KafkaConfig}
-import kafka.producer.{KeyedMessage, Producer, ProducerConfig}
+import kafka.producer.{KeyedMessage, Producer}
 import org.apache.log4j.{Level, Logger}
 import org.I0Itec.zkclient.ZkClient
 import kafka.zk.ZooKeeperTestHarness
@@ -29,18 +29,20 @@ import org.scalatest.junit.JUnit3Suite
 import scala.collection._
 import kafka.admin.AdminUtils
 import kafka.common.{TopicAndPartition, ErrorMapping, UnknownTopicOrPartitionException, OffsetOutOfRangeException}
-import kafka.utils.{TestUtils, Utils}
+import kafka.utils.{StaticPartitioner, TestUtils, Utils}
+import kafka.serializer.StringEncoder
+import java.util.Properties
 
 /**
  * End to end tests of the primitive apis against a local server
  */
 class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with ZooKeeperTestHarness {
+  val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler])
 
-  val port = TestUtils.choosePort
+  val port = TestUtils.choosePort()
   val props = TestUtils.createBrokerConfig(0, port)
   val config = new KafkaConfig(props)
   val configs = List(config)
-  val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler])
 
   def testFetchRequestCanProperlySerialize() {
     val request = new FetchRequestBuilder()
@@ -67,15 +69,12 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
 
   def testDefaultEncoderProducerAndFetch() {
     val topic = "test-topic"
-    val props = producer.config.props.props
-    val config = new ProducerConfig(props)
 
-    val stringProducer1 = new Producer[String, String](config)
-    stringProducer1.send(new KeyedMessage[String, String](topic, "test-message"))
+    producer.send(new KeyedMessage[String, String](topic, "test-message"))
 
     val replica = servers.head.replicaManager.getReplica(topic, 0).get
     assertTrue("HighWatermark should equal logEndOffset with just 1 replica",
-               replica.logEndOffset > 0 && replica.logEndOffset == replica.highWatermark)
+               replica.logEndOffset.messageOffset > 0 && replica.logEndOffset.equals(replica.highWatermark))
 
     val request = new FetchRequestBuilder()
       .clientId("test-client")
@@ -93,14 +92,19 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
 
   def testDefaultEncoderProducerAndFetchWithCompression() {
     val topic = "test-topic"
-    val props = producer.config.props.props
-    props.put("compression", "true")
-    val config = new ProducerConfig(props)
+    val props = new Properties()
+    props.put("compression.codec", "gzip")
+
+    val stringProducer1 = TestUtils.createProducer[String, String](
+      TestUtils.getBrokerListStrFromConfigs(configs),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName,
+      partitioner = classOf[StaticPartitioner].getName,
+      producerProps = props)
 
-    val stringProducer1 = new Producer[String, String](config)
     stringProducer1.send(new KeyedMessage[String, String](topic, "test-message"))
 
-    var fetched = consumer.fetch(new FetchRequestBuilder().addFetch(topic, 0, 0, 10000).build())
+    val fetched = consumer.fetch(new FetchRequestBuilder().addFetch(topic, 0, 0, 10000).build())
     val messageSet = fetched.messageSet(topic, 0)
     assertTrue(messageSet.iterator.hasNext)
 
@@ -108,8 +112,8 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
     assertEquals("test-message", Utils.readString(fetchedMessageAndOffset.message.payload, "UTF-8"))
   }
 
-  def testProduceAndMultiFetch() {
-    createSimpleTopicsAndAwaitLeader(zkClient, List("test1", "test2", "test3", "test4"), config.brokerId)
+  private def produceAndMultiFetch(producer: Producer[String, String]) {
+    createSimpleTopicsAndAwaitLeader(zkClient, List("test1", "test2", "test3", "test4"))
 
     // send some messages
     val topics = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
@@ -171,139 +175,60 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
     requestHandlerLogger.setLevel(Level.ERROR)
   }
 
-  def testProduceAndMultiFetchWithCompression() {
-    createSimpleTopicsAndAwaitLeader(zkClient, List("test1", "test2", "test3", "test4"), config.brokerId)
-
-    // send some messages
-    val topics = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
-    {
-      val messages = new mutable.HashMap[String, Seq[String]]
-      val builder = new FetchRequestBuilder()
-      for( (topic, partition) <- topics) {
-        val messageList = List("a_" + topic, "b_" + topic)
-        val producerData = messageList.map(new KeyedMessage[String, String](topic, topic, _))
-        messages += topic -> messageList
-        producer.send(producerData:_*)
-        builder.addFetch(topic, partition, 0, 10000)
-      }
-
-      // wait a bit for produced message to be available
-      val request = builder.build()
-      val response = consumer.fetch(request)
-      for( (topic, partition) <- topics) {
-        val fetched = response.messageSet(topic, partition)
-        assertEquals(messages(topic), fetched.map(messageAndOffset => Utils.readString(messageAndOffset.message.payload)))
-      }
-    }
-
-    // temporarily set request handler logger to a higher level
-    requestHandlerLogger.setLevel(Level.FATAL)
-
-    {
-      // send some invalid offsets
-      val builder = new FetchRequestBuilder()
-      for( (topic, partition) <- topics)
-        builder.addFetch(topic, partition, -1, 10000)
-
-      try {
-        val request = builder.build()
-        val response = consumer.fetch(request)
-        response.data.values.foreach(pdata => ErrorMapping.maybeThrowException(pdata.error))
-        fail("Expected exception when fetching message with invalid offset")
-      } catch {
-        case e: OffsetOutOfRangeException => "this is good"
-      }
-    }
-
-    {
-      // send some invalid partitions
-      val builder = new FetchRequestBuilder()
-      for( (topic, _) <- topics)
-        builder.addFetch(topic, -1, 0, 10000)
-
-      try {
-        val request = builder.build()
-        val response = consumer.fetch(request)
-        response.data.values.foreach(pdata => ErrorMapping.maybeThrowException(pdata.error))
-        fail("Expected exception when fetching message with invalid partition")
-      } catch {
-        case e: UnknownTopicOrPartitionException => "this is good"
-      }
-    }
-
-    // restore set request handler logger to a higher level
-    requestHandlerLogger.setLevel(Level.ERROR)
+  def testProduceAndMultiFetch() {
+    produceAndMultiFetch(producer)
   }
 
-  def testMultiProduce() {
-    createSimpleTopicsAndAwaitLeader(zkClient, List("test1", "test2", "test3", "test4"), config.brokerId)
+  private def multiProduce(producer: Producer[String, String]) {
+    val topics = Map("test4" -> 0, "test1" -> 0, "test2" -> 0, "test3" -> 0)
+    createSimpleTopicsAndAwaitLeader(zkClient, topics.keys)
 
-    // send some messages
-    val topics = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
     val messages = new mutable.HashMap[String, Seq[String]]
     val builder = new FetchRequestBuilder()
-    var produceList: List[KeyedMessage[String, String]] = Nil
-    for( (topic, partition) <- topics) {
+    for((topic, partition) <- topics) {
       val messageList = List("a_" + topic, "b_" + topic)
       val producerData = messageList.map(new KeyedMessage[String, String](topic, topic, _))
       messages += topic -> messageList
       producer.send(producerData:_*)
       builder.addFetch(topic, partition, 0, 10000)
     }
-    producer.send(produceList: _*)
 
     val request = builder.build()
     val response = consumer.fetch(request)
-    for( (topic, partition) <- topics) {
+    for((topic, partition) <- topics) {
       val fetched = response.messageSet(topic, partition)
       assertEquals(messages(topic), fetched.map(messageAndOffset => Utils.readString(messageAndOffset.message.payload)))
     }
   }
 
-  def testMultiProduceWithCompression() {
-    // send some messages
-    val topics = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
-    val messages = new mutable.HashMap[String, Seq[String]]
-    val builder = new FetchRequestBuilder()
-    var produceList: List[KeyedMessage[String, String]] = Nil
-    for( (topic, partition) <- topics) {
-      val messageList = List("a_" + topic, "b_" + topic)
-      val producerData = messageList.map(new KeyedMessage[String, String](topic, topic, _))
-      messages += topic -> messageList
-      producer.send(producerData:_*)
-      builder.addFetch(topic, partition, 0, 10000)
-    }
-    producer.send(produceList: _*)
-
-    // wait a bit for produced message to be available
-    val request = builder.build()
-    val response = consumer.fetch(request)
-    for( (topic, partition) <- topics) {
-      val fetched = response.messageSet(topic, 0)
-      assertEquals(messages(topic), fetched.map(messageAndOffset => Utils.readString(messageAndOffset.message.payload)))
-    }
+  def testMultiProduce() {
+    multiProduce(producer)
   }
 
   def testConsumerEmptyTopic() {
     val newTopic = "new-topic"
-    AdminUtils.createTopic(zkClient, newTopic, 1, 1)
-    TestUtils.waitUntilMetadataIsPropagated(servers, newTopic, 0, 1000)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, newTopic, 0, 500)
+    TestUtils.createTopic(zkClient, newTopic, numPartitions = 1, replicationFactor = 1, servers = servers)
+
     val fetchResponse = consumer.fetch(new FetchRequestBuilder().addFetch(newTopic, 0, 0, 10000).build())
     assertFalse(fetchResponse.messageSet(newTopic, 0).iterator.hasNext)
   }
 
   def testPipelinedProduceRequests() {
-    createSimpleTopicsAndAwaitLeader(zkClient, List("test1", "test2", "test3", "test4"), config.brokerId)
-    val props = producer.config.props.props
+    val topics = Map("test4" -> 0, "test1" -> 0, "test2" -> 0, "test3" -> 0)
+    createSimpleTopicsAndAwaitLeader(zkClient, topics.keys)
+    val props = new Properties()
     props.put("request.required.acks", "0")
-    val pipelinedProducer: Producer[String, String] = new Producer(new ProducerConfig(props))
+    val pipelinedProducer: Producer[String, String] =
+      TestUtils.createProducer[String, String](
+        TestUtils.getBrokerListStrFromConfigs(configs),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[StringEncoder].getName,
+        partitioner = classOf[StaticPartitioner].getName,
+        producerProps = props)
 
     // send some messages
-    val topics = List(("test4", 0), ("test1", 0), ("test2", 0), ("test3", 0));
     val messages = new mutable.HashMap[String, Seq[String]]
     val builder = new FetchRequestBuilder()
-    var produceList: List[KeyedMessage[String, String]] = Nil
     for( (topic, partition) <- topics) {
       val messageList = List("a_" + topic, "b_" + topic)
       val producerData = messageList.map(new KeyedMessage[String, String](topic, topic, _))
@@ -313,17 +238,24 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
     }
 
     // wait until the messages are published
-    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test1", 0)).get.logEndOffset == 2 }, 1000)
-    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test2", 0)).get.logEndOffset == 2 }, 1000)
-    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test3", 0)).get.logEndOffset == 2 }, 1000)
-    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test4", 0)).get.logEndOffset == 2 }, 1000)
+    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test1", 0)).get.logEndOffset == 2 },
+                            "Published messages should be in the log")
+    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test2", 0)).get.logEndOffset == 2 },
+                            "Published messages should be in the log")
+    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test3", 0)).get.logEndOffset == 2 },
+                            "Published messages should be in the log")
+    TestUtils.waitUntilTrue(() => { servers.head.logManager.getLog(TopicAndPartition("test4", 0)).get.logEndOffset == 2 },
+                            "Published messages should be in the log")
 
     val replicaId = servers.head.config.brokerId
-    val hwWaitMs = config.replicaHighWatermarkCheckpointIntervalMs
-    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test1", 0, replicaId).get.highWatermark == 2 }, hwWaitMs)
-    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test2", 0, replicaId).get.highWatermark == 2 }, hwWaitMs)
-    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test3", 0, replicaId).get.highWatermark == 2 }, hwWaitMs)
-    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test4", 0, replicaId).get.highWatermark == 2 }, hwWaitMs)
+    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test1", 0, replicaId).get.highWatermark.messageOffset == 2 },
+                            "High watermark should equal to log end offset")
+    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test2", 0, replicaId).get.highWatermark.messageOffset == 2 },
+                            "High watermark should equal to log end offset")
+    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test3", 0, replicaId).get.highWatermark.messageOffset == 2 },
+                            "High watermark should equal to log end offset")
+    TestUtils.waitUntilTrue(() => { servers.head.replicaManager.getReplica("test4", 0, replicaId).get.highWatermark.messageOffset == 2 },
+                            "High watermark should equal to log end offset")
 
     // test if the consumer received the messages in the correct order when producer has enabled request pipelining
     val request = builder.build()
@@ -338,10 +270,10 @@ class PrimitiveApiTest extends JUnit3Suite with ProducerConsumerTestHarness with
    * For testing purposes, just create these topics each with one partition and one replica for
    * which the provided broker should the leader for.  Create and wait for broker to lead.  Simple.
    */
-  def createSimpleTopicsAndAwaitLeader(zkClient: ZkClient, topics: Seq[String], brokerId: Int) {
+  private def createSimpleTopicsAndAwaitLeader(zkClient: ZkClient, topics: Iterable[String]) {
     for( topic <- topics ) {
-      AdminUtils.createTopic(zkClient, topic, 1, 1)
-      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+      AdminUtils.createTopic(zkClient, topic, partitions = 1, replicationFactor = 1)
+      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, partition = 0)
     }
   }
 }
diff --git a/core/src/test/scala/unit/kafka/integration/ProducerConsumerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/ProducerConsumerTestHarness.scala
index 731ee59e67e30..108c2e7f47ede 100644
--- a/core/src/test/scala/unit/kafka/integration/ProducerConsumerTestHarness.scala
+++ b/core/src/test/scala/unit/kafka/integration/ProducerConsumerTestHarness.scala
@@ -19,8 +19,10 @@ package kafka.integration
 
 import kafka.consumer.SimpleConsumer
 import org.scalatest.junit.JUnit3Suite
-import kafka.producer.{ProducerConfig, Producer}
-import kafka.utils.TestUtils
+import kafka.producer.Producer
+import kafka.utils.{StaticPartitioner, TestUtils}
+import kafka.serializer.StringEncoder
+
 trait ProducerConsumerTestHarness extends JUnit3Suite with KafkaServerTestHarness {
     val port: Int
     val host = "localhost"
@@ -29,8 +31,10 @@ trait ProducerConsumerTestHarness extends JUnit3Suite with KafkaServerTestHarnes
 
   override def setUp() {
       super.setUp
-      val props = TestUtils.getProducerConfig(TestUtils.getBrokerListStrFromConfigs(configs), "kafka.utils.StaticPartitioner")
-      producer = new Producer(new ProducerConfig(props))
+      producer = TestUtils.createProducer[String, String](TestUtils.getBrokerListStrFromConfigs(configs),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[StringEncoder].getName,
+        partitioner = classOf[StaticPartitioner].getName)
       consumer = new SimpleConsumer(host, port, 1000000, 64*1024, "")
     }
 
diff --git a/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala b/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala
index b585f0ec0b1c4..eab4b5f619015 100644
--- a/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala
@@ -15,18 +15,14 @@
  * limitations under the License.
  */
 
-package kafka.server
+package kafka.integration
 
 import org.scalatest.junit.JUnit3Suite
 import kafka.zk.ZooKeeperTestHarness
 import kafka.utils.TestUtils._
 import junit.framework.Assert._
-import kafka.utils.{ZkUtils, Utils, TestUtils}
-import kafka.controller.{ControllerContext, LeaderIsrAndControllerEpoch, ControllerChannelManager}
-import kafka.cluster.Broker
-import kafka.common.ErrorMapping
-import kafka.api._
-import kafka.admin.AdminUtils
+import kafka.utils.{Utils, TestUtils}
+import kafka.server.{KafkaConfig, KafkaServer}
 
 class RollingBounceTest extends JUnit3Suite with ZooKeeperTestHarness {
   val brokerId1 = 0
@@ -39,15 +35,11 @@ class RollingBounceTest extends JUnit3Suite with ZooKeeperTestHarness {
   val port3 = TestUtils.choosePort()
   val port4 = TestUtils.choosePort()
 
-  val enableShutdown = true
+  // controlled.shutdown.enable is true by default
   val configProps1 = TestUtils.createBrokerConfig(brokerId1, port1)
-  configProps1.put("controlled.shutdown.enable", "true")
   val configProps2 = TestUtils.createBrokerConfig(brokerId2, port2)
-  configProps2.put("controlled.shutdown.enable", "true")
   val configProps3 = TestUtils.createBrokerConfig(brokerId3, port3)
-  configProps3.put("controlled.shutdown.enable", "true")
   val configProps4 = TestUtils.createBrokerConfig(brokerId4, port4)
-  configProps4.put("controlled.shutdown.enable", "true")
   configProps4.put("controlled.shutdown.retry.backoff.ms", "100")
 
   var servers: Seq[KafkaServer] = Seq.empty[KafkaServer]
@@ -79,31 +71,10 @@ class RollingBounceTest extends JUnit3Suite with ZooKeeperTestHarness {
     val topic4 = "new-topic4"
 
     // create topics with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic1, Map(0->Seq(0,1)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic2, Map(0->Seq(1,2)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic3, Map(0->Seq(2,3)))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic4, Map(0->Seq(0,3)))
-
-    // wait until leader is elected
-    var leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, partitionId, 500)
-    var leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, partitionId, 500)
-    var leader3 = waitUntilLeaderIsElectedOrChanged(zkClient, topic3, partitionId, 500)
-    var leader4 = waitUntilLeaderIsElectedOrChanged(zkClient, topic4, partitionId, 500)
-
-    debug("Leader for " + topic1  + " is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic2 + " is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic3 + "is elected to be: %s".format(leader1.getOrElse(-1)))
-    debug("Leader for " + topic4 + "is elected to be: %s".format(leader1.getOrElse(-1)))
-
-    assertTrue("Leader should get elected", leader1.isDefined)
-    assertTrue("Leader should get elected", leader2.isDefined)
-    assertTrue("Leader should get elected", leader3.isDefined)
-    assertTrue("Leader should get elected", leader4.isDefined)
-
-    assertTrue("Leader could be broker 0 or broker 1 for " + topic1, (leader1.getOrElse(-1) == 0) || (leader1.getOrElse(-1) == 1))
-    assertTrue("Leader could be broker 1 or broker 2 for " + topic2, (leader2.getOrElse(-1) == 1) || (leader1.getOrElse(-1) == 2))
-    assertTrue("Leader could be broker 2 or broker 3 for " + topic3, (leader3.getOrElse(-1) == 2) || (leader1.getOrElse(-1) == 3))
-    assertTrue("Leader could be broker 3 or broker 4 for " + topic4, (leader4.getOrElse(-1) == 0) || (leader1.getOrElse(-1) == 3))
+    createTopic(zkClient, topic1, partitionReplicaAssignment = Map(0->Seq(0,1)), servers = servers)
+    createTopic(zkClient, topic2, partitionReplicaAssignment = Map(0->Seq(1,2)), servers = servers)
+    createTopic(zkClient, topic3, partitionReplicaAssignment = Map(0->Seq(2,3)), servers = servers)
+    createTopic(zkClient, topic4, partitionReplicaAssignment = Map(0->Seq(0,3)), servers = servers)
 
     // Do a rolling bounce and check if leader transitions happen correctly
 
@@ -130,10 +101,10 @@ class RollingBounceTest extends JUnit3Suite with ZooKeeperTestHarness {
       servers((startIndex + 1) % 4).shutdown()
       prevLeader = (startIndex + 1) % 4
     }
-    var newleader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 1500)
+    var newleader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
     // Ensure the new leader is different from the old
     assertTrue("Leader transition did not happen for " + topic, newleader.getOrElse(-1) != -1 && (newleader.getOrElse(-1) != prevLeader))
     // Start the server back up again
     servers(prevLeader).startup()
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/integration/TopicMetadataTest.scala b/core/src/test/scala/unit/kafka/integration/TopicMetadataTest.scala
index 9998a1156d41d..35dc071b1056e 100644
--- a/core/src/test/scala/unit/kafka/integration/TopicMetadataTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/TopicMetadataTest.scala
@@ -65,9 +65,8 @@ class TopicMetadataTest extends JUnit3Suite with ZooKeeperTestHarness {
   def testBasicTopicMetadata {
     // create topic
     val topic = "test"
-    AdminUtils.createTopic(zkClient, topic, 1, 1)
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server1), topic, 0, 1000)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
+    createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 1, servers = Seq(server1))
+
     var topicsMetadata = ClientUtils.fetchTopicMetadata(Set(topic),brokers,"TopicMetadataTest-testBasicTopicMetadata",
       2000,0).topicsMetadata
     assertEquals(ErrorMapping.NoError, topicsMetadata.head.errorCode)
@@ -84,12 +83,8 @@ class TopicMetadataTest extends JUnit3Suite with ZooKeeperTestHarness {
     // create topic
     val topic1 = "testGetAllTopicMetadata1"
     val topic2 = "testGetAllTopicMetadata2"
-    AdminUtils.createTopic(zkClient, topic1, 1, 1)
-    AdminUtils.createTopic(zkClient, topic2, 1, 1)
-
-    // wait for leader to be elected for both topics
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server1), topic1, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server1), topic2, 0, 1000)
+    createTopic(zkClient, topic1, numPartitions = 1, replicationFactor = 1, servers = Seq(server1))
+    createTopic(zkClient, topic2, numPartitions = 1, replicationFactor = 1, servers = Seq(server1))
 
     // issue metadata request with empty list of topics
     var topicsMetadata = ClientUtils.fetchTopicMetadata(Set.empty, brokers, "TopicMetadataTest-testGetAllTopicMetadata",
@@ -119,8 +114,8 @@ class TopicMetadataTest extends JUnit3Suite with ZooKeeperTestHarness {
     assertEquals(0, topicsMetadata.head.partitionsMetadata.size)
 
     // wait for leader to be elected
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server1), topic, 0, 1000)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(Seq(server1), topic, 0)
 
     // retry the metadata for the auto created topic
     topicsMetadata = ClientUtils.fetchTopicMetadata(Set(topic),brokers,"TopicMetadataTest-testBasicTopicMetadata",
diff --git a/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala b/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala
new file mode 100644
index 0000000000000..ba3bcdcd1de98
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala
@@ -0,0 +1,284 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.integration
+
+import org.apache.kafka.common.config.ConfigException
+
+import scala.collection.mutable.MutableList
+import scala.util.Random
+import org.apache.log4j.{Level, Logger}
+import org.scalatest.junit.JUnit3Suite
+import java.util.Properties
+import junit.framework.Assert._
+import kafka.admin.AdminUtils
+import kafka.common.FailedToSendMessageException
+import kafka.consumer.{Consumer, ConsumerConfig, ConsumerTimeoutException}
+import kafka.producer.{KeyedMessage, Producer}
+import kafka.serializer.{DefaultEncoder, StringEncoder}
+import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.utils.Utils
+import kafka.utils.TestUtils._
+import kafka.zk.ZooKeeperTestHarness
+
+class UncleanLeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
+  val brokerId1 = 0
+  val brokerId2 = 1
+
+  val port1 = choosePort()
+  val port2 = choosePort()
+
+  // controlled shutdown is needed for these tests, but we can trim the retry count and backoff interval to
+  // reduce test execution time
+  val enableControlledShutdown = true
+  val configProps1 = createBrokerConfig(brokerId1, port1)
+  val configProps2 = createBrokerConfig(brokerId2, port2)
+
+  for (configProps <- List(configProps1, configProps2)) {
+    configProps.put("controlled.shutdown.enable", String.valueOf(enableControlledShutdown))
+    configProps.put("controlled.shutdown.max.retries", String.valueOf(1))
+    configProps.put("controlled.shutdown.retry.backoff.ms", String.valueOf(1000))
+  }
+
+  var configs: Seq[KafkaConfig] = Seq.empty[KafkaConfig]
+  var servers: Seq[KafkaServer] = Seq.empty[KafkaServer]
+
+  val random = new Random()
+  val topic = "topic" + random.nextLong
+  val partitionId = 0
+
+  val kafkaApisLogger = Logger.getLogger(classOf[kafka.server.KafkaApis])
+  val networkProcessorLogger = Logger.getLogger(classOf[kafka.network.Processor])
+  val syncProducerLogger = Logger.getLogger(classOf[kafka.producer.SyncProducer])
+  val eventHandlerLogger = Logger.getLogger(classOf[kafka.producer.async.DefaultEventHandler[Object, Object]])
+
+  override def setUp() {
+    super.setUp()
+
+    // temporarily set loggers to a higher level so that tests run quietly
+    kafkaApisLogger.setLevel(Level.FATAL)
+    networkProcessorLogger.setLevel(Level.FATAL)
+    syncProducerLogger.setLevel(Level.FATAL)
+    eventHandlerLogger.setLevel(Level.FATAL)
+  }
+
+  override def tearDown() {
+    servers.map(server => shutdownServer(server))
+    servers.map(server => Utils.rm(server.config.logDirs))
+
+    // restore log levels
+    kafkaApisLogger.setLevel(Level.ERROR)
+    networkProcessorLogger.setLevel(Level.ERROR)
+    syncProducerLogger.setLevel(Level.ERROR)
+    eventHandlerLogger.setLevel(Level.ERROR)
+
+    super.tearDown()
+  }
+
+  private def startBrokers(cluster: Seq[Properties]) {
+    for (props <- cluster) {
+      val config = new KafkaConfig(props)
+      val server = createServer(config)
+      configs ++= List(config)
+      servers ++= List(server)
+    }
+  }
+
+  def testUncleanLeaderElectionEnabled {
+    // unclean leader election is enabled by default
+    startBrokers(Seq(configProps1, configProps2))
+
+    // create topic with 1 partition, 2 replicas, one on each broker
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(partitionId -> Seq(brokerId1, brokerId2)))
+
+    verifyUncleanLeaderElectionEnabled
+  }
+
+  def testUncleanLeaderElectionDisabled {
+	// disable unclean leader election
+	configProps1.put("unclean.leader.election.enable", String.valueOf(false))
+	configProps2.put("unclean.leader.election.enable", String.valueOf(false))
+    startBrokers(Seq(configProps1, configProps2))
+
+    // create topic with 1 partition, 2 replicas, one on each broker
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(partitionId -> Seq(brokerId1, brokerId2)))
+
+    verifyUncleanLeaderElectionDisabled
+  }
+
+  def testUncleanLeaderElectionEnabledByTopicOverride {
+    // disable unclean leader election globally, but enable for our specific test topic
+    configProps1.put("unclean.leader.election.enable", String.valueOf(false))
+    configProps2.put("unclean.leader.election.enable", String.valueOf(false))
+    startBrokers(Seq(configProps1, configProps2))
+
+    // create topic with 1 partition, 2 replicas, one on each broker, and unclean leader election enabled
+    val topicProps = new Properties()
+    topicProps.put("unclean.leader.election.enable", String.valueOf(true))
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(partitionId -> Seq(brokerId1, brokerId2)),
+      topicProps)
+
+    verifyUncleanLeaderElectionEnabled
+  }
+
+  def testCleanLeaderElectionDisabledByTopicOverride {
+    // enable unclean leader election globally, but disable for our specific test topic
+    configProps1.put("unclean.leader.election.enable", String.valueOf(true))
+    configProps2.put("unclean.leader.election.enable", String.valueOf(true))
+    startBrokers(Seq(configProps1, configProps2))
+
+    // create topic with 1 partition, 2 replicas, one on each broker, and unclean leader election disabled
+    val topicProps = new Properties()
+    topicProps.put("unclean.leader.election.enable", String.valueOf(false))
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(partitionId -> Seq(brokerId1, brokerId2)),
+      topicProps)
+
+    verifyUncleanLeaderElectionDisabled
+  }
+
+  def testUncleanLeaderElectionInvalidTopicOverride {
+    startBrokers(Seq(configProps1))
+
+    // create topic with an invalid value for unclean leader election
+    val topicProps = new Properties()
+    topicProps.put("unclean.leader.election.enable", "invalid")
+
+    intercept[ConfigException] {
+      AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(partitionId -> Seq(brokerId1)), topicProps)
+    }
+  }
+
+  def verifyUncleanLeaderElectionEnabled {
+    // wait until leader is elected
+    val leaderIdOpt = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
+    assertTrue("Leader should get elected", leaderIdOpt.isDefined)
+    val leaderId = leaderIdOpt.get
+    debug("Leader for " + topic  + " is elected to be: %s".format(leaderId))
+    assertTrue("Leader id is set to expected value for topic: " + topic, leaderId == brokerId1 || leaderId == brokerId2)
+
+    // the non-leader broker is the follower
+    val followerId = if (leaderId == brokerId1) brokerId2 else brokerId1
+    debug("Follower for " + topic  + " is: %s".format(followerId))
+
+    produceMessage(topic, "first")
+    waitUntilMetadataIsPropagated(servers, topic, partitionId)
+    assertEquals(List("first"), consumeAllMessages(topic))
+
+    // shutdown follower server
+    servers.filter(server => server.config.brokerId == followerId).map(server => shutdownServer(server))
+
+    produceMessage(topic, "second")
+    assertEquals(List("first", "second"), consumeAllMessages(topic))
+
+    // shutdown leader and then restart follower
+    servers.filter(server => server.config.brokerId == leaderId).map(server => shutdownServer(server))
+    servers.filter(server => server.config.brokerId == followerId).map(server => server.startup())
+
+    // wait until new leader is (uncleanly) elected
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, newLeaderOpt = Some(followerId))
+
+    produceMessage(topic, "third")
+
+    // second message was lost due to unclean election
+    assertEquals(List("first", "third"), consumeAllMessages(topic))
+  }
+
+  def verifyUncleanLeaderElectionDisabled {
+    // wait until leader is elected
+    val leaderIdOpt = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
+    assertTrue("Leader should get elected", leaderIdOpt.isDefined)
+    val leaderId = leaderIdOpt.get
+    debug("Leader for " + topic  + " is elected to be: %s".format(leaderId))
+    assertTrue("Leader id is set to expected value for topic: " + topic, leaderId == brokerId1 || leaderId == brokerId2)
+
+    // the non-leader broker is the follower
+    val followerId = if (leaderId == brokerId1) brokerId2 else brokerId1
+    debug("Follower for " + topic  + " is: %s".format(followerId))
+
+    produceMessage(topic, "first")
+    waitUntilMetadataIsPropagated(servers, topic, partitionId)
+    assertEquals(List("first"), consumeAllMessages(topic))
+
+    // shutdown follower server
+    servers.filter(server => server.config.brokerId == followerId).map(server => shutdownServer(server))
+
+    produceMessage(topic, "second")
+    assertEquals(List("first", "second"), consumeAllMessages(topic))
+
+    // shutdown leader and then restart follower
+    servers.filter(server => server.config.brokerId == leaderId).map(server => shutdownServer(server))
+    servers.filter(server => server.config.brokerId == followerId).map(server => server.startup())
+
+    // verify that unclean election to non-ISR follower does not occur
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, newLeaderOpt = Some(-1))
+
+    // message production and consumption should both fail while leader is down
+    intercept[FailedToSendMessageException] {
+      produceMessage(topic, "third")
+    }
+    assertEquals(List.empty[String], consumeAllMessages(topic))
+
+    // restart leader temporarily to send a successfully replicated message
+    servers.filter(server => server.config.brokerId == leaderId).map(server => server.startup())
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, newLeaderOpt = Some(leaderId))
+
+    produceMessage(topic, "third")
+    waitUntilMetadataIsPropagated(servers, topic, partitionId)
+    servers.filter(server => server.config.brokerId == leaderId).map(server => shutdownServer(server))
+
+    // verify clean leader transition to ISR follower
+    waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, newLeaderOpt = Some(followerId))
+
+    // verify messages can be consumed from ISR follower that was just promoted to leader
+    assertEquals(List("first", "second", "third"), consumeAllMessages(topic))
+  }
+
+  private def shutdownServer(server: KafkaServer) = {
+    server.shutdown()
+    server.awaitShutdown()
+  }
+
+  private def produceMessage(topic: String, message: String) = {
+    val producer: Producer[String, Array[Byte]] = createProducer(
+      getBrokerListStrFromConfigs(configs),
+      keyEncoder = classOf[StringEncoder].getName)
+    producer.send(new KeyedMessage[String, Array[Byte]](topic, topic, message.getBytes))
+    producer.close()
+  }
+
+  private def consumeAllMessages(topic: String) : List[String] = {
+    // use a fresh consumer group every time so that we don't need to mess with disabling auto-commit or
+    // resetting the ZK offset
+    val consumerProps = createConsumerProperties(zkConnect, "group" + random.nextLong, "id", 1000)
+    val consumerConnector = Consumer.create(new ConsumerConfig(consumerProps))
+    val messageStream = consumerConnector.createMessageStreams(Map(topic -> 1))(topic).head
+
+    val messages = new MutableList[String]
+    val iter = messageStream.iterator
+    try {
+      while(iter.hasNext()) {
+        messages += new String(iter.next.message) // will throw a timeout exception if the message isn't there
+      }
+    } catch {
+      case e: ConsumerTimeoutException =>
+        debug("consumer timed out after receiving " + messages.length + " message(s).")
+    } finally {
+      consumerConnector.shutdown
+    }
+    messages.toList
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala b/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala
index 43af649f32976..d6248b09bb0f8 100644
--- a/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala
+++ b/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -17,21 +17,24 @@
 
 package kafka.javaapi.consumer
 
-import junit.framework.Assert._
-import kafka.integration.KafkaServerTestHarness
 import kafka.server._
-import org.scalatest.junit.JUnit3Suite
-import scala.collection.JavaConversions
-import org.apache.log4j.{Level, Logger}
 import kafka.message._
 import kafka.serializer._
+import kafka.integration.KafkaServerTestHarness
 import kafka.producer.KeyedMessage
 import kafka.javaapi.producer.Producer
 import kafka.utils.IntEncoder
-import kafka.utils.TestUtils._
 import kafka.utils.{Logging, TestUtils}
 import kafka.consumer.{KafkaStream, ConsumerConfig}
 import kafka.zk.ZooKeeperTestHarness
+import kafka.common.MessageStreamsExistException
+
+import scala.collection.JavaConversions
+
+import org.scalatest.junit.JUnit3Suite
+import org.apache.log4j.{Level, Logger}
+import junit.framework.Assert._
+
 
 class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHarness with ZooKeeperTestHarness with Logging {
 
@@ -52,14 +55,13 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
   def testBasic() {
     val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler])
     requestHandlerLogger.setLevel(Level.FATAL)
-    var actualMessages: List[Message] = Nil
+
+    // create the topic
+    TestUtils.createTopic(zkClient, topic, numParts, 1, servers)
 
     // send some messages to each broker
     val sentMessages1 = sendMessages(nMessages, "batch1")
 
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
-
     // create a consumer
     val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zookeeperConnect, group, consumer1))
     val zkConsumerConnector1 = new ZookeeperConsumerConnector(consumerConfig1, true)
@@ -68,18 +70,27 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val receivedMessages1 = getMessages(nMessages*2, topicMessageStreams1)
     assertEquals(sentMessages1.sorted, receivedMessages1.sorted)
 
+    // call createMesssageStreams twice should throw MessageStreamsExistException
+    try {
+      val topicMessageStreams2 = zkConsumerConnector1.createMessageStreams(toJavaMap(Map(topic -> numNodes*numParts/2)), new StringDecoder(), new StringDecoder())
+      fail("Should fail with MessageStreamsExistException")
+    } catch {
+      case e: MessageStreamsExistException => // expected
+    }
     zkConsumerConnector1.shutdown
     info("all consumer connectors stopped")
     requestHandlerLogger.setLevel(Level.ERROR)
   }
 
-  def sendMessages(conf: KafkaConfig, 
-                   messagesPerNode: Int, 
-                   header: String, 
+  def sendMessages(conf: KafkaConfig,
+                   messagesPerNode: Int,
+                   header: String,
                    compressed: CompressionCodec): List[String] = {
     var messages: List[String] = Nil
-    val producer: kafka.producer.Producer[Int, String] = 
-      TestUtils.createProducer(TestUtils.getBrokerListStrFromConfigs(configs), new StringEncoder(), new IntEncoder())
+    val producer: kafka.producer.Producer[Int, String] =
+      TestUtils.createProducer(TestUtils.getBrokerListStrFromConfigs(configs),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[IntEncoder].getName)
     val javaProducer: Producer[Int, String] = new kafka.javaapi.producer.Producer(producer)
     for (partition <- 0 until numParts) {
       val ms = 0.until(messagesPerNode).map(x => header + conf.brokerId + "-" + partition + "-" + x)
@@ -91,8 +102,8 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     messages
   }
 
-  def sendMessages(messagesPerNode: Int, 
-                   header: String, 
+  def sendMessages(messagesPerNode: Int,
+                   header: String,
                    compressed: CompressionCodec = NoCompressionCodec): List[String] = {
     var messages: List[String] = Nil
     for(conf <- configs)
@@ -100,7 +111,7 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     messages
   }
 
-  def getMessages(nMessagesPerThread: Int, 
+  def getMessages(nMessagesPerThread: Int,
                   jTopicMessageStreams: java.util.Map[String, java.util.List[KafkaStream[String, String]]]): List[String] = {
     var messages: List[String] = Nil
     import scala.collection.JavaConversions._
@@ -123,5 +134,5 @@ class ZookeeperConsumerConnectorTest extends JUnit3Suite with KafkaServerTestHar
     val javaMap = new java.util.HashMap[String, java.lang.Integer]()
     scalaMap.foreach(m => javaMap.put(m._1, m._2.asInstanceOf[java.lang.Integer]))
     javaMap
-  }  
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/log/CleanerTest.scala b/core/src/test/scala/unit/kafka/log/CleanerTest.scala
index 51cd94b316764..d10e4f4ccbca5 100644
--- a/core/src/test/scala/unit/kafka/log/CleanerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/CleanerTest.scala
@@ -33,7 +33,7 @@ import kafka.message._
 class CleanerTest extends JUnitSuite {
   
   val dir = TestUtils.tempDir()
-  val logConfig = LogConfig(segmentSize=1024, maxIndexSize=1024, dedupe=true)
+  val logConfig = LogConfig(segmentSize=1024, maxIndexSize=1024, compact=true)
   val time = new MockTime()
   val throttler = new Throttler(desiredRatePerSec = Double.MaxValue, checkIntervalMs = Long.MaxValue, time = time)
   
diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
index 1de3ef0435d76..5bfa764638e92 100644
--- a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
@@ -92,7 +92,7 @@ class LogCleanerIntegrationTest extends JUnitSuite {
   def makeCleaner(parts: Int, 
                   minDirtyMessages: Int = 0, 
                   numThreads: Int = 1,
-                  defaultPolicy: String = "dedupe",
+                  defaultPolicy: String = "compact",
                   policyOverrides: Map[String, String] = Map()): LogCleaner = {
     
     // create partitions and add them to the pool
@@ -101,7 +101,7 @@ class LogCleanerIntegrationTest extends JUnitSuite {
       val dir = new File(logDir, "log-" + i)
       dir.mkdirs()
       val log = new Log(dir = dir,
-                        LogConfig(segmentSize = segmentSize, maxIndexSize = 100*1024, fileDeleteDelayMs = deleteDelay, dedupe = true),
+                        LogConfig(segmentSize = segmentSize, maxIndexSize = 100*1024, fileDeleteDelayMs = deleteDelay, compact = true),
                         recoveryPoint = 0L,
                         scheduler = time.scheduler,
                         time = time)
diff --git a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
index b4bee33191ebc..90cd53033fafa 100644
--- a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
@@ -21,10 +21,9 @@ import java.io._
 import junit.framework.Assert._
 import org.junit.Test
 import org.scalatest.junit.JUnit3Suite
-import kafka.server.KafkaConfig
+import kafka.server.{BrokerState, OffsetCheckpoint}
 import kafka.common._
 import kafka.utils._
-import kafka.server.OffsetCheckpoint
 
 class LogManagerTest extends JUnit3Suite {
 
@@ -36,20 +35,11 @@ class LogManagerTest extends JUnit3Suite {
   var logManager: LogManager = null
   val name = "kafka"
   val veryLargeLogFlushInterval = 10000000L
-  val cleanerConfig = CleanerConfig(enableCleaner = false)
 
   override def setUp() {
     super.setUp()
     logDir = TestUtils.tempDir()
-    logManager = new LogManager(logDirs = Array(logDir), 
-                                topicConfigs = Map(), 
-                                defaultConfig = logConfig, 
-                                cleanerConfig = cleanerConfig, 
-                                flushCheckMs = 1000L, 
-                                flushCheckpointMs = 100000L, 
-                                retentionCheckMs = 1000L, 
-                                scheduler = time.scheduler, 
-                                time = time)
+    logManager = createLogManager()
     logManager.startup
     logDir = logManager.logDirs(0)
   }
@@ -104,7 +94,7 @@ class LogManagerTest extends JUnit3Suite {
     assertEquals("Now there should only be only one segment in the index.", 1, log.numberOfSegments)
     time.sleep(log.config.fileDeleteDelayMs + 1)
     assertEquals("Files should have been deleted", log.numberOfSegments * 2, log.dir.list.length)
-    assertEquals("Should get empty fetch off new log.", 0, log.read(offset+1, 1024).sizeInBytes)
+    assertEquals("Should get empty fetch off new log.", 0, log.read(offset+1, 1024).messageSet.sizeInBytes)
 
     try {
       log.read(0, 1024)
@@ -124,8 +114,8 @@ class LogManagerTest extends JUnit3Suite {
     val setSize = TestUtils.singleMessageSet("test".getBytes()).sizeInBytes
     logManager.shutdown()
 
-    val config = logConfig.copy(segmentSize = 10 * (setSize - 1), retentionSize = 5L * 10L * setSize + 10L)
-    logManager = new LogManager(Array(logDir), Map(), config, cleanerConfig, 1000L, 100000L, 1000L, time.scheduler, time)
+    val config = logConfig.copy(segmentSize = 10 * setSize, retentionSize = 5L * 10L * setSize + 10L)
+    logManager = createLogManager()
     logManager.startup
 
     // create a log
@@ -147,7 +137,7 @@ class LogManagerTest extends JUnit3Suite {
     assertEquals("Now there should be exactly 6 segments", 6, log.numberOfSegments)
     time.sleep(log.config.fileDeleteDelayMs + 1)
     assertEquals("Files should have been deleted", log.numberOfSegments * 2, log.dir.list.length)
-    assertEquals("Should get empty fetch off new log.", 0, log.read(offset + 1, 1024).sizeInBytes)
+    assertEquals("Should get empty fetch off new log.", 0, log.read(offset + 1, 1024).messageSet.sizeInBytes)
     try {
       log.read(0, 1024)
       fail("Should get exception from fetching earlier.")
@@ -165,7 +155,7 @@ class LogManagerTest extends JUnit3Suite {
   def testTimeBasedFlush() {
     logManager.shutdown()
     val config = logConfig.copy(flushMs = 1000)
-    logManager = new LogManager(Array(logDir), Map(), config, cleanerConfig, 1000L, 10000L, 1000L, time.scheduler, time)
+    logManager = createLogManager()
     logManager.startup
     val log = logManager.createLog(TopicAndPartition(name, 0), config)
     val lastFlush = log.lastFlushTime
@@ -187,8 +177,8 @@ class LogManagerTest extends JUnit3Suite {
                      TestUtils.tempDir(), 
                      TestUtils.tempDir())
     logManager.shutdown()
-    logManager = new LogManager(dirs, Map(), logConfig, cleanerConfig, 1000L, 10000L, 1000L, time.scheduler, time)
-    
+    logManager = createLogManager()
+
     // verify that logs are always assigned to the least loaded partition
     for(partition <- 0 until 20) {
       logManager.createLog(TopicAndPartition("test", partition), logConfig)
@@ -201,32 +191,77 @@ class LogManagerTest extends JUnit3Suite {
   /**
    * Test that it is not possible to open two log managers using the same data directory
    */
+  @Test
   def testTwoLogManagersUsingSameDirFails() {
     try {
-      new LogManager(Array(logDir), Map(), logConfig, cleanerConfig, 1000L, 10000L, 1000L, time.scheduler, time)
+      createLogManager()
       fail("Should not be able to create a second log manager instance with the same data directory")
     } catch {
       case e: KafkaException => // this is good 
     }
   }
-  
+
   /**
    * Test that recovery points are correctly written out to disk
    */
+  @Test
   def testCheckpointRecoveryPoints() {
-    val topicA = TopicAndPartition("test-a", 1)
-    val topicB = TopicAndPartition("test-b", 1)
-    val logA = this.logManager.createLog(topicA, logConfig)
-    val logB = this.logManager.createLog(topicB, logConfig)
-    for(i <- 0 until 50) 
-      logA.append(TestUtils.singleMessageSet("test".getBytes()))
-    for(i <- 0 until 100)
-      logB.append(TestUtils.singleMessageSet("test".getBytes()))
-    logA.flush()
-    logB.flush()
+    verifyCheckpointRecovery(Seq(TopicAndPartition("test-a", 1), TopicAndPartition("test-b", 1)), logManager)
+  }
+
+  /**
+   * Test that recovery points directory checking works with trailing slash
+   */
+  @Test
+  def testRecoveryDirectoryMappingWithTrailingSlash() {
+    logManager.shutdown()
+    logDir = TestUtils.tempDir()
+    logManager = TestUtils.createLogManager(
+      logDirs = Array(new File(logDir.getAbsolutePath + File.separator)))
+    logManager.startup
+    verifyCheckpointRecovery(Seq(TopicAndPartition("test-a", 1)), logManager)
+  }
+
+  /**
+   * Test that recovery points directory checking works with relative directory
+   */
+  @Test
+  def testRecoveryDirectoryMappingWithRelativeDirectory() {
+    logManager.shutdown()
+    logDir = new File("data" + File.separator + logDir.getName)
+    logDir.mkdirs()
+    logDir.deleteOnExit()
+    logManager = createLogManager()
+    logManager.startup
+    verifyCheckpointRecovery(Seq(TopicAndPartition("test-a", 1)), logManager)
+  }
+
+
+  private def verifyCheckpointRecovery(topicAndPartitions: Seq[TopicAndPartition],
+                                       logManager: LogManager) {
+    val logs = topicAndPartitions.map(this.logManager.createLog(_, logConfig))
+    logs.foreach(log => {
+      for(i <- 0 until 50)
+        log.append(TestUtils.singleMessageSet("test".getBytes()))
+
+      log.flush()
+    })
+
     logManager.checkpointRecoveryPointOffsets()
     val checkpoints = new OffsetCheckpoint(new File(logDir, logManager.RecoveryPointCheckpointFile)).read()
-    assertEquals("Recovery point should equal checkpoint", checkpoints(topicA), logA.recoveryPoint)
-    assertEquals("Recovery point should equal checkpoint", checkpoints(topicB), logB.recoveryPoint)
+
+    topicAndPartitions.zip(logs).foreach {
+      case(tp, log) => {
+        assertEquals("Recovery point should equal checkpoint", checkpoints(tp), log.recoveryPoint)
+      }
+    }
+  }
+
+
+  private def createLogManager(logDirs: Array[File] = Array(this.logDir)): LogManager = {
+    TestUtils.createLogManager(
+      defaultConfig = logConfig,
+      logDirs = logDirs,
+      time = this.time)
   }
 }
diff --git a/core/src/test/scala/unit/kafka/log/LogSegmentTest.scala b/core/src/test/scala/unit/kafka/log/LogSegmentTest.scala
index 6b7603728ae52..03fb3512c4a44 100644
--- a/core/src/test/scala/unit/kafka/log/LogSegmentTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogSegmentTest.scala
@@ -39,7 +39,7 @@ class LogSegmentTest extends JUnit3Suite {
     val idxFile = TestUtils.tempFile()
     idxFile.delete()
     val idx = new OffsetIndex(idxFile, offset, 1000)
-    val seg = new LogSegment(ms, idx, offset, 10, SystemTime)
+    val seg = new LogSegment(ms, idx, offset, 10, 0, SystemTime)
     segments += seg
     seg
   }
@@ -78,7 +78,7 @@ class LogSegmentTest extends JUnit3Suite {
     val seg = createSegment(40)
     val ms = messages(50, "hello", "there", "little", "bee")
     seg.append(50, ms)
-    val read = seg.read(startOffset = 41, maxSize = 300, maxOffset = None)
+    val read = seg.read(startOffset = 41, maxSize = 300, maxOffset = None).messageSet
     assertEquals(ms.toList, read.toList)
   }
   
@@ -94,7 +94,7 @@ class LogSegmentTest extends JUnit3Suite {
     seg.append(baseOffset, ms)
     def validate(offset: Long) = 
       assertEquals(ms.filter(_.offset == offset).toList, 
-                   seg.read(startOffset = offset, maxSize = 1024, maxOffset = Some(offset+1)).toList)
+                   seg.read(startOffset = offset, maxSize = 1024, maxOffset = Some(offset+1)).messageSet.toList)
     validate(50)
     validate(51)
     validate(52)
@@ -109,7 +109,7 @@ class LogSegmentTest extends JUnit3Suite {
     val ms = messages(50, "hello", "there")
     seg.append(50, ms)
     val read = seg.read(startOffset = 52, maxSize = 200, maxOffset = None)
-    assertNull("Read beyond the last offset in the segment should give null", null)
+    assertNull("Read beyond the last offset in the segment should give null", read)
   }
   
   /**
@@ -124,7 +124,7 @@ class LogSegmentTest extends JUnit3Suite {
     val ms2 = messages(60, "alpha", "beta")
     seg.append(60, ms2)
     val read = seg.read(startOffset = 55, maxSize = 200, maxOffset = None)
-    assertEquals(ms2.toList, read.toList)
+    assertEquals(ms2.toList, read.messageSet.toList)
   }
   
   /**
@@ -142,12 +142,12 @@ class LogSegmentTest extends JUnit3Suite {
       seg.append(offset+1, ms2)
       // check that we can read back both messages
       val read = seg.read(offset, None, 10000)
-      assertEquals(List(ms1.head, ms2.head), read.toList)
+      assertEquals(List(ms1.head, ms2.head), read.messageSet.toList)
       // now truncate off the last message
       seg.truncateTo(offset + 1)
       val read2 = seg.read(offset, None, 10000)
-      assertEquals(1, read2.size)
-      assertEquals(ms1.head, read2.head)
+      assertEquals(1, read2.messageSet.size)
+      assertEquals(ms1.head, read2.messageSet.head)
       offset += 1
     }
   }
@@ -204,7 +204,7 @@ class LogSegmentTest extends JUnit3Suite {
     TestUtils.writeNonsenseToFile(indexFile, 5, indexFile.length.toInt)
     seg.recover(64*1024)
     for(i <- 0 until 100)
-      assertEquals(i, seg.read(i, Some(i+1), 1024).head.offset)
+      assertEquals(i, seg.read(i, Some(i+1), 1024).messageSet.head.offset)
   }
   
   /**
diff --git a/core/src/test/scala/unit/kafka/log/LogTest.scala b/core/src/test/scala/unit/kafka/log/LogTest.scala
index 1da1393983d4b..c2dd8eb69da8c 100644
--- a/core/src/test/scala/unit/kafka/log/LogTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogTest.scala
@@ -18,15 +18,13 @@
 package kafka.log
 
 import java.io._
-import java.util.ArrayList
 import java.util.concurrent.atomic._
 import junit.framework.Assert._
 import org.scalatest.junit.JUnitSuite
 import org.junit.{After, Before, Test}
 import kafka.message._
-import kafka.common.{MessageSizeTooLargeException, OffsetOutOfRangeException}
+import kafka.common.{MessageSizeTooLargeException, OffsetOutOfRangeException, MessageSetSizeTooLargeException}
 import kafka.utils._
-import scala.Some
 import kafka.server.KafkaConfig
 
 class LogTest extends JUnitSuite {
@@ -89,6 +87,32 @@ class LogTest extends JUnitSuite {
     assertEquals("Appending an empty message set should not roll log even if succient time has passed.", numSegments, log.numberOfSegments)
   }
 
+  /**
+   * Test for jitter s for time based log roll. This test appends messages then changes the time
+   * using the mock clock to force the log to roll and checks the number of segments.
+   */
+  @Test
+  def testTimeBasedLogRollJitter() {
+    val set = TestUtils.singleMessageSet("test".getBytes())
+    val maxJitter = 20 * 60L
+
+    // create a log
+    val log = new Log(logDir,
+      logConfig.copy(segmentMs = 1 * 60 * 60L, segmentJitterMs = maxJitter),
+      recoveryPoint = 0L,
+      scheduler = time.scheduler,
+      time = time)
+    assertEquals("Log begins with a single empty segment.", 1, log.numberOfSegments)
+    log.append(set)
+
+    time.sleep(log.config.segmentMs - maxJitter)
+    log.append(set)
+    assertEquals("Log does not roll on this append because it occurs earlier than max jitter", 1, log.numberOfSegments);
+    time.sleep(maxJitter - log.activeSegment.rollJitterMs + 1)
+    log.append(set)
+    assertEquals("Log should roll after segmentMs adjusted by random jitter", 2, log.numberOfSegments)
+  }
+
   /**
    * Test that appending more than the maximum segment size rolls the log
    */
@@ -131,11 +155,11 @@ class LogTest extends JUnitSuite {
     for(i <- 0 until messages.length)
       log.append(new ByteBufferMessageSet(NoCompressionCodec, messages = messages(i)))
     for(i <- 0 until messages.length) {
-      val read = log.read(i, 100, Some(i+1)).head
+      val read = log.read(i, 100, Some(i+1)).messageSet.head
       assertEquals("Offset read should match order appended.", i, read.offset)
       assertEquals("Message should match appended.", messages(i), read.message)
     }
-    assertEquals("Reading beyond the last message returns nothing.", 0, log.read(messages.length, 100, None).size)
+    assertEquals("Reading beyond the last message returns nothing.", 0, log.read(messages.length, 100, None).messageSet.size)
   }
   
   /**
@@ -153,7 +177,7 @@ class LogTest extends JUnitSuite {
       log.append(new ByteBufferMessageSet(NoCompressionCodec, new AtomicLong(messageIds(i)), messages = messages(i)), assignOffsets = false)
     for(i <- 50 until messageIds.max) {
       val idx = messageIds.indexWhere(_ >= i)
-      val read = log.read(i, 100, None).head
+      val read = log.read(i, 100, None).messageSet.head
       assertEquals("Offset read should match message id.", messageIds(idx), read.offset)
       assertEquals("Message should match appended.", messages(idx), read.message)
     }
@@ -176,7 +200,7 @@ class LogTest extends JUnitSuite {
     // now manually truncate off all but one message from the first segment to create a gap in the messages
     log.logSegments.head.truncateTo(1)
     
-    assertEquals("A read should now return the last message in the log", log.logEndOffset-1, log.read(1, 200, None).head.offset)
+    assertEquals("A read should now return the last message in the log", log.logEndOffset-1, log.read(1, 200, None).messageSet.head.offset)
   }
   
   /**
@@ -188,7 +212,7 @@ class LogTest extends JUnitSuite {
   def testReadOutOfRange() {
     createEmptyLogs(logDir, 1024)
     val log = new Log(logDir, logConfig.copy(segmentSize = 1024), recoveryPoint = 0L, time.scheduler, time = time)
-    assertEquals("Reading just beyond end of log should produce 0 byte read.", 0, log.read(1024, 1000).sizeInBytes)
+    assertEquals("Reading just beyond end of log should produce 0 byte read.", 0, log.read(1024, 1000).messageSet.sizeInBytes)
     try {
       log.read(0, 1024)
       fail("Expected exception on invalid read.")
@@ -219,12 +243,12 @@ class LogTest extends JUnitSuite {
     /* do successive reads to ensure all our messages are there */
     var offset = 0L
     for(i <- 0 until numMessages) {
-      val messages = log.read(offset, 1024*1024)
+      val messages = log.read(offset, 1024*1024).messageSet
       assertEquals("Offsets not equal", offset, messages.head.offset)
       assertEquals("Messages not equal at offset " + offset, messageSets(i).head.message, messages.head.message)
       offset = messages.head.offset + 1
     }
-    val lastRead = log.read(startOffset = numMessages, maxLength = 1024*1024, maxOffset = Some(numMessages + 1))
+    val lastRead = log.read(startOffset = numMessages, maxLength = 1024*1024, maxOffset = Some(numMessages + 1)).messageSet
     assertEquals("Should be no more messages", 0, lastRead.size)
     
     // check that rolling the log forced a flushed the log--the flush is asyn so retry in case of failure
@@ -239,13 +263,13 @@ class LogTest extends JUnitSuite {
   @Test
   def testCompressedMessages() {
     /* this log should roll after every messageset */
-    val log = new Log(logDir, logConfig.copy(segmentSize = 10), recoveryPoint = 0L, time.scheduler, time = time)
+    val log = new Log(logDir, logConfig.copy(segmentSize = 100), recoveryPoint = 0L, time.scheduler, time = time)
     
     /* append 2 compressed message sets, each with two messages giving offsets 0, 1, 2, 3 */
     log.append(new ByteBufferMessageSet(DefaultCompressionCodec, new Message("hello".getBytes), new Message("there".getBytes)))
     log.append(new ByteBufferMessageSet(DefaultCompressionCodec, new Message("alpha".getBytes), new Message("beta".getBytes)))
     
-    def read(offset: Int) = ByteBufferMessageSet.decompress(log.read(offset, 4096).head.message)
+    def read(offset: Int) = ByteBufferMessageSet.decompress(log.read(offset, 4096).messageSet.head.message)
     
     /* we should always get the first message in the compressed set when reading any offset in the set */
     assertEquals("Read at offset 0 should produce 0", 0, read(0).head.offset)
@@ -286,7 +310,26 @@ class LogTest extends JUnitSuite {
   }
 
   /**
-   * We have a max size limit on message appends, check that it is properly enforced by appending a message larger than the 
+   *  MessageSet size shouldn't exceed the config.segmentSize, check that it is properly enforced by
+   * appending a message set larger than the config.segmentSize setting and checking that an exception is thrown.
+   */
+  @Test
+  def testMessageSetSizeCheck() {
+    val messageSet = new ByteBufferMessageSet(NoCompressionCodec, new Message ("You".getBytes), new Message("bethe".getBytes))
+    // append messages to log
+    val configSegmentSize = messageSet.sizeInBytes - 1
+    val log = new Log(logDir, logConfig.copy(segmentSize = configSegmentSize), recoveryPoint = 0L, time.scheduler, time = time)
+
+    try {
+      log.append(messageSet)
+      fail("message set should throw MessageSetSizeTooLargeException.")
+    } catch {
+      case e: MessageSetSizeTooLargeException => // this is good
+    }
+  }
+
+  /**
+   * We have a max size limit on message appends, check that it is properly enforced by appending a message larger than the
    * setting and checking that an exception is thrown.
    */
   @Test
@@ -305,10 +348,9 @@ class LogTest extends JUnitSuite {
       log.append(second)
       fail("Second message set should throw MessageSizeTooLargeException.")
     } catch {
-        case e: MessageSizeTooLargeException => // this is good
+      case e: MessageSizeTooLargeException => // this is good
     }
   }
-  
   /**
    * Append a bunch of messages to a log and then re-open it both with and without recovery and check that the log re-initializes correctly.
    */
@@ -363,7 +405,7 @@ class LogTest extends JUnitSuite {
     log = new Log(logDir, config, recoveryPoint = 0L, time.scheduler, time)    
     assertEquals("Should have %d messages when log is reopened".format(numMessages), numMessages, log.logEndOffset)
     for(i <- 0 until numMessages)
-      assertEquals(i, log.read(i, 100, None).head.offset)
+      assertEquals(i, log.read(i, 100, None).messageSet.head.offset)
     log.close()
   }
 
@@ -375,7 +417,7 @@ class LogTest extends JUnitSuite {
     val set = TestUtils.singleMessageSet("test".getBytes())
     val setSize = set.sizeInBytes
     val msgPerSeg = 10
-    val segmentSize = msgPerSeg * (setSize - 1) // each segment will be 10 messages
+    val segmentSize = msgPerSeg * setSize  // each segment will be 10 messages
 
     // create a log
     val log = new Log(logDir, logConfig.copy(segmentSize = segmentSize), recoveryPoint = 0L, scheduler = time.scheduler, time = time)
@@ -429,7 +471,7 @@ class LogTest extends JUnitSuite {
     val set = TestUtils.singleMessageSet("test".getBytes())
     val setSize = set.sizeInBytes
     val msgPerSeg = 10
-    val segmentSize = msgPerSeg * (setSize - 1) // each segment will be 10 messages
+    val segmentSize = msgPerSeg * setSize  // each segment will be 10 messages
     val config = logConfig.copy(segmentSize = segmentSize)
     val log = new Log(logDir, config, recoveryPoint = 0L, scheduler = time.scheduler, time = time)
     assertEquals("There should be exactly 1 segment.", 1, log.numberOfSegments)
@@ -575,15 +617,15 @@ class LogTest extends JUnitSuite {
   
   @Test
   def testAppendMessageWithNullPayload() {
-    var log = new Log(logDir,
+    val log = new Log(logDir,
                       LogConfig(),
                       recoveryPoint = 0L,
                       time.scheduler,
                       time)
     log.append(new ByteBufferMessageSet(new Message(bytes = null)))
-    val ms = log.read(0, 4096, None)
-    assertEquals(0, ms.head.offset)
-    assertTrue("Message payload should be null.", ms.head.message.isNull)
+    val messageSet = log.read(0, 4096, None).messageSet
+    assertEquals(0, messageSet.head.offset)
+    assertTrue("Message payload should be null.", messageSet.head.message.isNull)
   }
   
   @Test
@@ -646,4 +688,79 @@ class LogTest extends JUnitSuite {
     assertEquals(recoveryPoint, log.logEndOffset)
     cleanShutdownFile.delete()
   }
+
+  @Test
+  def testParseTopicPartitionName() {
+    val topic: String = "test_topic"
+    val partition:String = "143"
+    val dir: File = new File(logDir + topicPartitionName(topic, partition))
+    val topicAndPartition = Log.parseTopicPartitionName(dir);
+    assertEquals(topic, topicAndPartition.asTuple._1)
+    assertEquals(partition.toInt, topicAndPartition.asTuple._2)
+  }
+
+  @Test
+  def testParseTopicPartitionNameForEmptyName() {
+    try {
+      val dir: File = new File("")
+      val topicAndPartition = Log.parseTopicPartitionName(dir);
+      fail("KafkaException should have been thrown for dir: " + dir.getCanonicalPath)
+    } catch {
+      case e: Exception => // its GOOD!
+    }
+  }
+
+  @Test
+  def testParseTopicPartitionNameForNull() {
+    try {
+      val dir: File = null
+      val topicAndPartition = Log.parseTopicPartitionName(dir);
+      fail("KafkaException should have been thrown for dir: " + dir)
+    } catch {
+      case e: Exception => // its GOOD!
+    }
+  }
+
+  @Test
+  def testParseTopicPartitionNameForMissingSeparator() {
+    val topic: String = "test_topic"
+    val partition:String = "1999"
+    val dir: File = new File(logDir + File.separator + topic + partition)
+    try {
+      val topicAndPartition = Log.parseTopicPartitionName(dir);
+      fail("KafkaException should have been thrown for dir: " + dir.getCanonicalPath)
+    } catch {
+      case e: Exception => // its GOOD!
+    }
+  }
+
+  @Test
+  def testParseTopicPartitionNameForMissingTopic() {
+    val topic: String = ""
+    val partition:String = "1999"
+    val dir: File = new File(logDir + topicPartitionName(topic, partition))
+    try {
+      val topicAndPartition = Log.parseTopicPartitionName(dir);
+      fail("KafkaException should have been thrown for dir: " + dir.getCanonicalPath)
+    } catch {
+      case e: Exception => // its GOOD!
+    }
+  }
+
+  @Test
+  def testParseTopicPartitionNameForMissingPartition() {
+    val topic: String = "test_topic"
+    val partition:String = ""
+    val dir: File = new File(logDir + topicPartitionName(topic, partition))
+    try {
+      val topicAndPartition = Log.parseTopicPartitionName(dir);
+      fail("KafkaException should have been thrown for dir: " + dir.getCanonicalPath)
+    } catch {
+      case e: Exception => // its GOOD!
+    }
+  }
+
+  def topicPartitionName(topic: String, partition: String): String = {
+    File.separator + topic + "-" + partition
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/log4j/KafkaLog4jAppenderTest.scala b/core/src/test/scala/unit/kafka/log4j/KafkaLog4jAppenderTest.scala
index 67497dd042dfd..4ea0489c9fd36 100644
--- a/core/src/test/scala/unit/kafka/log4j/KafkaLog4jAppenderTest.scala
+++ b/core/src/test/scala/unit/kafka/log4j/KafkaLog4jAppenderTest.scala
@@ -6,32 +6,35 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 package kafka.log4j
 
-import java.util.Properties
-import java.io.File
 import kafka.consumer.SimpleConsumer
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.utils.{TestUtils, Utils, Logging}
-import junit.framework.Assert._
 import kafka.api.FetchRequestBuilder
 import kafka.producer.async.MissingConfigException
 import kafka.serializer.Encoder
 import kafka.zk.ZooKeeperTestHarness
+
+import java.util.Properties
+import java.io.File
+
 import org.apache.log4j.spi.LoggingEvent
 import org.apache.log4j.{PropertyConfigurator, Logger}
 import org.junit.{After, Before, Test}
 import org.scalatest.junit.JUnit3Suite
 
+import junit.framework.Assert._
+
 class KafkaLog4jAppenderTest extends JUnit3Suite with ZooKeeperTestHarness with Logging {
 
   var logDirZk: File = null
@@ -55,8 +58,8 @@ class KafkaLog4jAppenderTest extends JUnit3Suite with ZooKeeperTestHarness with
     val logDirZkPath = propsZk.getProperty("log.dir")
     logDirZk = new File(logDirZkPath)
     config = new KafkaConfig(propsZk)
-    server = TestUtils.createServer(config);
-    simpleConsumerZk = new SimpleConsumer("localhost", portZk, 1000000, 64*1024, "")
+    server = TestUtils.createServer(config)
+    simpleConsumerZk = new SimpleConsumer("localhost", portZk, 1000000, 64 * 1024, "")
   }
 
   @After
@@ -69,72 +72,37 @@ class KafkaLog4jAppenderTest extends JUnit3Suite with ZooKeeperTestHarness with
 
   @Test
   def testKafkaLog4jConfigs() {
+    // host missing
     var props = new Properties()
     props.put("log4j.rootLogger", "INFO")
     props.put("log4j.appender.KAFKA", "kafka.producer.KafkaLog4jAppender")
-    props.put("log4j.appender.KAFKA.layout","org.apache.log4j.PatternLayout")
-    props.put("log4j.appender.KAFKA.layout.ConversionPattern","%-5p: %c - %m%n")
+    props.put("log4j.appender.KAFKA.layout", "org.apache.log4j.PatternLayout")
+    props.put("log4j.appender.KAFKA.layout.ConversionPattern", "%-5p: %c - %m%n")
     props.put("log4j.appender.KAFKA.Topic", "test-topic")
-    props.put("log4j.appender.KAFKA.SerializerClass", "kafka.log4j.AppenderStringEncoder")
     props.put("log4j.logger.kafka.log4j", "INFO, KAFKA")
 
-    // port missing
     try {
       PropertyConfigurator.configure(props)
       fail("Missing properties exception was expected !")
-    }catch {
-      case e: MissingConfigException =>
-    }
-
-    props = new Properties()
-    props.put("log4j.rootLogger", "INFO")
-    props.put("log4j.appender.KAFKA", "kafka.producer.KafkaLog4jAppender")
-    props.put("log4j.appender.KAFKA.layout","org.apache.log4j.PatternLayout")
-    props.put("log4j.appender.KAFKA.layout.ConversionPattern","%-5p: %c - %m%n")
-    props.put("log4j.appender.KAFKA.Topic", "test-topic")
-    props.put("log4j.appender.KAFKA.SerializerClass", "kafka.log4j.AppenderStringEncoder")
-    props.put("log4j.logger.kafka.log4j", "INFO, KAFKA")
-
-    // host missing
-    try {
-      PropertyConfigurator.configure(props)
-      fail("Missing properties exception was expected !")
-    }catch {
+    } catch {
       case e: MissingConfigException =>
     }
 
+    // topic missing
     props = new Properties()
     props.put("log4j.rootLogger", "INFO")
     props.put("log4j.appender.KAFKA", "kafka.producer.KafkaLog4jAppender")
-    props.put("log4j.appender.KAFKA.layout","org.apache.log4j.PatternLayout")
-    props.put("log4j.appender.KAFKA.layout.ConversionPattern","%-5p: %c - %m%n")
-    props.put("log4j.appender.KAFKA.SerializerClass", "kafka.log4j.AppenderStringEncoder")
+    props.put("log4j.appender.KAFKA.layout", "org.apache.log4j.PatternLayout")
+    props.put("log4j.appender.KAFKA.layout.ConversionPattern", "%-5p: %c - %m%n")
     props.put("log4j.appender.KAFKA.brokerList", TestUtils.getBrokerListStrFromConfigs(Seq(config)))
     props.put("log4j.logger.kafka.log4j", "INFO, KAFKA")
 
-    // topic missing
     try {
       PropertyConfigurator.configure(props)
       fail("Missing properties exception was expected !")
-    }catch {
+    } catch {
       case e: MissingConfigException =>
     }
-
-    props = new Properties()
-    props.put("log4j.rootLogger", "INFO")
-    props.put("log4j.appender.KAFKA", "kafka.producer.KafkaLog4jAppender")
-    props.put("log4j.appender.KAFKA.layout","org.apache.log4j.PatternLayout")
-    props.put("log4j.appender.KAFKA.layout.ConversionPattern","%-5p: %c - %m%n")
-    props.put("log4j.appender.KAFKA.brokerList", TestUtils.getBrokerListStrFromConfigs(Seq(config)))
-    props.put("log4j.appender.KAFKA.Topic", "test-topic")
-    props.put("log4j.logger.kafka.log4j", "INFO, KAFKA")
-
-    // serializer missing
-    try {
-      PropertyConfigurator.configure(props)
-    }catch {
-      case e: MissingConfigException => fail("should default to kafka.serializer.StringEncoder")
-    }
   }
 
   @Test
@@ -156,15 +124,16 @@ class KafkaLog4jAppenderTest extends JUnit3Suite with ZooKeeperTestHarness with
   }
 
   private def getLog4jConfig: Properties = {
-    var props = new Properties()
+    val props = new Properties()
     props.put("log4j.rootLogger", "INFO")
     props.put("log4j.appender.KAFKA", "kafka.producer.KafkaLog4jAppender")
-    props.put("log4j.appender.KAFKA.layout","org.apache.log4j.PatternLayout")
-    props.put("log4j.appender.KAFKA.layout.ConversionPattern","%-5p: %c - %m%n")
-    props.put("log4j.appender.KAFKA.brokerList", TestUtils.getBrokerListStrFromConfigs(Seq(config)))
+    props.put("log4j.appender.KAFKA.layout", "org.apache.log4j.PatternLayout")
+    props.put("log4j.appender.KAFKA.layout.ConversionPattern", "%-5p: %c - %m%n")
+    props.put("log4j.appender.KAFKA.BrokerList", TestUtils.getBrokerListStrFromConfigs(Seq(config)))
     props.put("log4j.appender.KAFKA.Topic", "test-topic")
-    props.put("log4j.logger.kafka.log4j", "INFO,KAFKA")
-    props.put("log4j.appender.KAFKA.requiredNumAcks", "1")
+    props.put("log4j.appender.KAFKA.RequiredNumAcks", "1")
+    props.put("log4j.appender.KAFKA.SyncSend", "true")
+    props.put("log4j.logger.kafka.log4j", "INFO, KAFKA")
     props
   }
 }
diff --git a/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala b/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
index 6db245c956d21..dd8847f5f709a 100644
--- a/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
+++ b/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
@@ -31,7 +31,7 @@ trait BaseMessageSetTestCases extends JUnitSuite {
   def createMessageSet(messages: Seq[Message]): MessageSet
 
   @Test
-  def testWrittenEqualsRead {
+  def testWrittenEqualsRead() {
     val messageSet = createMessageSet(messages)
     checkEquals(messages.iterator, messageSet.map(m => m.message).iterator)
   }
diff --git a/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala b/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala
index ed22931f24fed..76987d4fa68fd 100644
--- a/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala
+++ b/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala
@@ -30,6 +30,8 @@ class MessageCompressionTest extends JUnitSuite {
     val codecs = mutable.ArrayBuffer[CompressionCodec](GZIPCompressionCodec)
     if(isSnappyAvailable)
       codecs += SnappyCompressionCodec
+    if(isLZ4Available)
+      codecs += LZ4CompressionCodec
     for(codec <- codecs)
       testSimpleCompressDecompress(codec)
   }
@@ -42,15 +44,6 @@ class MessageCompressionTest extends JUnitSuite {
     assertEquals(messages, decompressed)
   }
 
-  @Test
-  def testComplexCompressDecompress() {
-    val messages = List(new Message("hi there".getBytes), new Message("I am fine".getBytes), new Message("I am not so well today".getBytes))
-    val message = new ByteBufferMessageSet(compressionCodec = DefaultCompressionCodec, messages = messages.slice(0, 2):_*)
-    val complexMessages = List(message.shallowIterator.next.message):::messages.slice(2,3)
-    val complexMessage = new ByteBufferMessageSet(compressionCodec = DefaultCompressionCodec, messages = complexMessages:_*)
-    val decompressedMessages = complexMessage.iterator.map(_.message).toList
-    assertEquals(messages, decompressedMessages)
-  }
   
   def isSnappyAvailable(): Boolean = {
     try {
@@ -61,4 +54,13 @@ class MessageCompressionTest extends JUnitSuite {
       case e: org.xerial.snappy.SnappyError => false
     }
   }
+
+  def isLZ4Available(): Boolean = {
+    try {
+      val lz4 = new net.jpountz.lz4.LZ4BlockOutputStream(new ByteArrayOutputStream())
+      true
+    } catch {
+      case e: UnsatisfiedLinkError => false
+    }
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/message/MessageTest.scala b/core/src/test/scala/unit/kafka/message/MessageTest.scala
index 4837585d03535..7b74a0d315470 100644
--- a/core/src/test/scala/unit/kafka/message/MessageTest.scala
+++ b/core/src/test/scala/unit/kafka/message/MessageTest.scala
@@ -39,7 +39,7 @@ class MessageTest extends JUnitSuite {
   def setUp(): Unit = {
     val keys = Array(null, "key".getBytes, "".getBytes)
     val vals = Array("value".getBytes, "".getBytes, null)
-    val codecs = Array(NoCompressionCodec, GZIPCompressionCodec)
+    val codecs = Array(NoCompressionCodec, GZIPCompressionCodec, SnappyCompressionCodec, LZ4CompressionCodec)
     for(k <- keys; v <- vals; codec <- codecs)
       messages += new MessageTestVal(k, v, codec, new Message(v, k, codec))
   }
diff --git a/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala
new file mode 100644
index 0000000000000..3cf23b3d6d446
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.consumer
+
+import com.yammer.metrics.Metrics
+import junit.framework.Assert._
+import kafka.integration.KafkaServerTestHarness
+import kafka.server._
+import scala.collection._
+import org.scalatest.junit.JUnit3Suite
+import kafka.message._
+import kafka.serializer._
+import kafka.utils._
+import kafka.utils.TestUtils._
+
+class MetricsTest extends JUnit3Suite with KafkaServerTestHarness with Logging {
+  val zookeeperConnect = TestZKUtils.zookeeperConnect
+  val numNodes = 2
+  val numParts = 2
+  val topic = "topic1"
+  val configs =
+    for (props <- TestUtils.createBrokerConfigs(numNodes))
+    yield new KafkaConfig(props) {
+      override val zkConnect = zookeeperConnect
+      override val numPartitions = numParts
+    }
+  val nMessages = 2
+
+  override def tearDown() {
+    super.tearDown()
+  }
+
+  def testMetricsLeak() {
+    // create topic topic1 with 1 partition on broker 0
+    createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 1, servers = servers)
+    // force creation not client's specific metrics.
+    createAndShutdownStep("group0", "consumer0", "producer0")
+
+    val countOfStaticMetrics = Metrics.defaultRegistry().allMetrics().keySet().size
+
+    for (i <- 0 to 5) {
+      createAndShutdownStep("group" + i % 3, "consumer" + i % 2, "producer" + i % 2)
+      assertEquals(countOfStaticMetrics, Metrics.defaultRegistry().allMetrics().keySet().size)
+    }
+  }
+
+  def createAndShutdownStep(group: String, consumerId: String, producerId: String): Unit = {
+    val sentMessages1 = sendMessages(configs, topic, producerId, nMessages, "batch1", NoCompressionCodec, 1)
+    // create a consumer
+    val consumerConfig1 = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, group, consumerId))
+    val zkConsumerConnector1 = new ZookeeperConsumerConnector(consumerConfig1, true)
+    val topicMessageStreams1 = zkConsumerConnector1.createMessageStreams(Map(topic -> 1), new StringDecoder(), new StringDecoder())
+    val receivedMessages1 = getMessages(nMessages, topicMessageStreams1)
+
+    zkConsumerConnector1.shutdown()
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
index 4ff6f55914909..5f4d85254c384 100644
--- a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
+++ b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
@@ -5,7 +5,7 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
@@ -31,7 +31,6 @@ import kafka.message.ByteBufferMessageSet
 import java.nio.channels.SelectionKey
 import kafka.utils.TestUtils
 
-
 class SocketServerTest extends JUnitSuite {
 
   val server: SocketServer = new SocketServer(0,
@@ -41,7 +40,9 @@ class SocketServerTest extends JUnitSuite {
                                               maxQueuedRequests = 50,
                                               sendBufferSize = 300000,
                                               recvBufferSize = 300000,
-                                              maxRequestSize = 50)
+                                              maxRequestSize = 50,
+                                              maxConnectionsPerIp = 5,
+                                              connectionsMaxIdleMs = 60*1000)
   server.startup()
 
   def sendRequest(socket: Socket, id: Short, request: Array[Byte]) {
@@ -52,7 +53,7 @@ class SocketServerTest extends JUnitSuite {
     outgoing.flush()
   }
 
-  def receiveResponse(socket: Socket): Array[Byte] = { 
+  def receiveResponse(socket: Socket): Array[Byte] = {
     val incoming = new DataInputStream(socket.getInputStream)
     val len = incoming.readInt()
     val response = new Array[Byte](len)
@@ -76,7 +77,7 @@ class SocketServerTest extends JUnitSuite {
   def cleanup() {
     server.shutdown()
   }
-
+  
   @Test
   def simpleRequest() {
     val socket = connect()
@@ -98,7 +99,7 @@ class SocketServerTest extends JUnitSuite {
     assertEquals(serializedBytes.toSeq, receiveResponse(socket).toSeq)
   }
 
-  @Test(expected=classOf[IOException])
+  @Test(expected = classOf[IOException])
   def tooBigRequestIsRejected() {
     val tooManyBytes = new Array[Byte](server.maxRequestSize + 1)
     new Random().nextBytes(tooManyBytes)
@@ -108,34 +109,51 @@ class SocketServerTest extends JUnitSuite {
   }
 
   @Test
-  def testSocketSelectionKeyState() {
+  def testNullResponse() {
     val socket = connect()
-    val correlationId = -1
-    val clientId = SyncProducerConfig.DefaultClientId
-    val ackTimeoutMs = SyncProducerConfig.DefaultAckTimeoutMs
-    val ack: Short = 0
-    val emptyRequest =
-      new ProducerRequest(correlationId, clientId, ack, ackTimeoutMs, collection.mutable.Map[TopicAndPartition, ByteBufferMessageSet]())
-
-    val byteBuffer = ByteBuffer.allocate(emptyRequest.sizeInBytes)
-    emptyRequest.writeTo(byteBuffer)
-    byteBuffer.rewind()
-    val serializedBytes = new Array[Byte](byteBuffer.remaining)
-    byteBuffer.get(serializedBytes)
-
-    sendRequest(socket, 0, serializedBytes)
+    val bytes = new Array[Byte](40)
+    sendRequest(socket, 0, bytes)
 
     val request = server.requestChannel.receiveRequest
     // Since the response is not sent yet, the selection key should not be readable.
-    Assert.assertFalse((request.requestKey.asInstanceOf[SelectionKey].interestOps & SelectionKey.OP_READ) == SelectionKey.OP_READ)
+    TestUtils.waitUntilTrue(
+      () => { (request.requestKey.asInstanceOf[SelectionKey].interestOps & SelectionKey.OP_READ) != SelectionKey.OP_READ },
+      "Socket key shouldn't be available for read")
 
     server.requestChannel.sendResponse(new RequestChannel.Response(0, request, null))
 
     // After the response is sent to the client (which is async and may take a bit of time), the socket key should be available for reads.
-    Assert.assertTrue(
-      TestUtils.waitUntilTrue(
-        () => { (request.requestKey.asInstanceOf[SelectionKey].interestOps & SelectionKey.OP_READ) == SelectionKey.OP_READ },
-        5000)
-    )
+    TestUtils.waitUntilTrue(
+      () => { (request.requestKey.asInstanceOf[SelectionKey].interestOps & SelectionKey.OP_READ) == SelectionKey.OP_READ },
+      "Socket key should be available for reads")
+  }
+
+  @Test(expected = classOf[IOException])
+  def testSocketsCloseOnShutdown() {
+    // open a connection
+    val socket = connect()
+    val bytes = new Array[Byte](40)
+    // send a request first to make sure the connection has been picked up by the socket server
+    sendRequest(socket, 0, bytes)
+    processRequest(server.requestChannel)
+    // then shutdown the server
+    server.shutdown()
+    // doing a subsequent send should throw an exception as the connection should be closed.
+    sendRequest(socket, 0, bytes)
+  }
+  
+  @Test
+  def testMaxConnectionsPerIp() {
+    // make the maximum allowable number of connections and then leak them
+    val conns = (0 until server.maxConnectionsPerIp).map(i => connect())
+    
+    // now try one more (should fail)
+    try {
+      val conn = connect()
+      sendRequest(conn, 100, "hello".getBytes)
+      assertEquals(-1, conn.getInputStream().read())
+    } catch {
+      case e: IOException => // this is good
+    }
   }
 }
diff --git a/core/src/test/scala/unit/kafka/producer/AsyncProducerTest.scala b/core/src/test/scala/unit/kafka/producer/AsyncProducerTest.scala
index 18e355501808c..1db6ac329f7b5 100644
--- a/core/src/test/scala/unit/kafka/producer/AsyncProducerTest.scala
+++ b/core/src/test/scala/unit/kafka/producer/AsyncProducerTest.scala
@@ -84,15 +84,11 @@ class AsyncProducerTest extends JUnit3Suite {
 
   @Test
   def testProduceAfterClosed() {
-    val props = new Properties()
-    props.put("serializer.class", "kafka.serializer.StringEncoder")
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(configs))
-    props.put("producer.type", "async")
-    props.put("batch.num.messages", "1")
-
-    val config = new ProducerConfig(props)
     val produceData = getProduceData(10)
-    val producer = new Producer[String, String](config)
+    val producer = createProducer[String, String](
+      getBrokerListStrFromConfigs(configs),
+      encoder = classOf[StringEncoder].getName)
+
     producer.close
 
     try {
@@ -169,7 +165,7 @@ class AsyncProducerTest extends JUnit3Suite {
     props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(configs))
     val broker1 = new Broker(0, "localhost", 9092)
     val broker2 = new Broker(1, "localhost", 9093)
-    broker1
+
     // form expected partitions metadata
     val partition1Metadata = new PartitionMetadata(0, Some(broker1), List(broker1, broker2))
     val partition2Metadata = new PartitionMetadata(1, Some(broker2), List(broker1, broker2))
@@ -234,7 +230,13 @@ class AsyncProducerTest extends JUnit3Suite {
 
     val serializedData = handler.serialize(produceData)
     val deserializedData = serializedData.map(d => new KeyedMessage[String,String](d.topic, Utils.readString(d.message.payload)))
+
+    // Test that the serialize handles seq from a Stream
+    val streamedSerializedData = handler.serialize(Stream(produceData:_*))
+    val deserializedStreamData = streamedSerializedData.map(d => new KeyedMessage[String,String](d.topic, Utils.readString(d.message.payload)))
+
     TestUtils.checkEquals(produceData.iterator, deserializedData.iterator)
+    TestUtils.checkEquals(produceData.iterator, deserializedStreamData.iterator)
   }
 
   @Test
@@ -303,10 +305,14 @@ class AsyncProducerTest extends JUnit3Suite {
   @Test
   def testIncompatibleEncoder() {
     val props = new Properties()
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(configs))
-    val config = new ProducerConfig(props)
+    // no need to retry since the send will always fail
+    props.put("message.send.max.retries", "0")
+    val producer= createProducer[String, String](
+      brokerList = getBrokerListStrFromConfigs(configs),
+      encoder = classOf[DefaultEncoder].getName,
+      keyEncoder = classOf[DefaultEncoder].getName,
+      producerProps = props)
 
-    val producer=new Producer[String, String](config)
     try {
       producer.send(getProduceData(1): _*)
       fail("Should fail with ClassCastException due to incompatible Encoder")
@@ -355,42 +361,6 @@ class AsyncProducerTest extends JUnit3Suite {
     }
   }
 
-  @Test
-  def testBrokerListAndAsync() {
-    return
-    val props = TestUtils.getProducerConfig(TestUtils.getBrokerListStrFromConfigs(configs))
-    props.put("producer.type", "async")
-    props.put("batch.num.messages", "5")
-
-    val config = new ProducerConfig(props)
-
-    val topic = "topic1"
-    val topic1Metadata = getTopicMetadata(topic, 0, 0, "localhost", 9092)
-    val topicPartitionInfos = new collection.mutable.HashMap[String, TopicMetadata]
-    topicPartitionInfos.put("topic1", topic1Metadata)
-
-    val producerPool = new ProducerPool(config)
-
-    val msgs = TestUtils.getMsgStrings(10)
-
-    val handler = new DefaultEventHandler[String,String](config,
-                                                         partitioner = null.asInstanceOf[Partitioner],
-                                                         encoder = new StringEncoder,
-                                                         keyEncoder = new StringEncoder,
-                                                         producerPool = producerPool,
-                                                         topicPartitionInfos = topicPartitionInfos)
-
-    val producer = new Producer[String, String](config, handler)
-    try {
-      // send all 10 messages, should create 2 batches and 2 syncproducer calls
-      producer.send(msgs.map(m => new KeyedMessage[String,String](topic, m)): _*)
-      producer.close
-
-    } catch {
-      case e: Exception => fail("Not expected", e)
-    }
-  }
-
   @Test
   def testFailedSendRetryLogic() {
     val props = new Properties()
diff --git a/core/src/test/scala/unit/kafka/producer/ProducerTest.scala b/core/src/test/scala/unit/kafka/producer/ProducerTest.scala
index 4b2e4ade70f6f..ce65dab4910d9 100644
--- a/core/src/test/scala/unit/kafka/producer/ProducerTest.scala
+++ b/core/src/test/scala/unit/kafka/producer/ProducerTest.scala
@@ -17,6 +17,7 @@
 
 package kafka.producer
 
+import org.apache.kafka.common.config.ConfigException
 import org.scalatest.TestFailedException
 import org.scalatest.junit.JUnit3Suite
 import kafka.consumer.SimpleConsumer
@@ -34,7 +35,7 @@ import org.junit.Assert.assertTrue
 import org.junit.Assert.assertFalse
 import org.junit.Assert.assertEquals
 import kafka.common.{ErrorMapping, FailedToSendMessageException}
-
+import kafka.serializer.StringEncoder
 
 class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   private val brokerId1 = 0
@@ -48,10 +49,10 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   private val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler])
   private var servers = List.empty[KafkaServer]
 
-  private val props1 = TestUtils.createBrokerConfig(brokerId1, port1)
+  private val props1 = TestUtils.createBrokerConfig(brokerId1, port1, false)
   props1.put("num.partitions", "4")
   private val config1 = new KafkaConfig(props1)
-  private val props2 = TestUtils.createBrokerConfig(brokerId2, port2)
+  private val props2 = TestUtils.createBrokerConfig(brokerId2, port2, false)
   props2.put("num.partitions", "4")
   private val config2 = new KafkaConfig(props2)
 
@@ -76,6 +77,12 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   override def tearDown() {
     // restore set request handler logger to a higher level
     requestHandlerLogger.setLevel(Level.ERROR)
+
+    if (consumer1 != null)
+      consumer1.close()
+    if (consumer2 != null)
+      consumer2.close()
+
     server1.shutdown
     server2.shutdown
     Utils.rm(server1.config.logDirs)
@@ -86,31 +93,32 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   @Test
   def testUpdateBrokerPartitionInfo() {
     val topic = "new-topic"
-    AdminUtils.createTopic(zkClient, topic, 1, 2)
-    // wait until the update metadata request for new topic reaches all servers
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 500)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    TestUtils.createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 2, servers = servers)
+
+    val props = new Properties()
+    // no need to retry since the send will always fail
+    props.put("message.send.max.retries", "0")
+    val producer1 = TestUtils.createProducer[String, String](
+        brokerList = "localhost:80,localhost:81",
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[StringEncoder].getName,
+        producerProps = props)
 
-    val props1 = new util.Properties()
-    props1.put("metadata.broker.list", "localhost:80,localhost:81")
-    props1.put("serializer.class", "kafka.serializer.StringEncoder")
-    val producerConfig1 = new ProducerConfig(props1)
-    val producer1 = new Producer[String, String](producerConfig1)
     try{
       producer1.send(new KeyedMessage[String, String](topic, "test", "test1"))
       fail("Test should fail because the broker list provided are not valid")
     } catch {
-      case e: FailedToSendMessageException =>
+      case e: FailedToSendMessageException => // this is expected
       case oe: Throwable => fail("fails with exception", oe)
     } finally {
       producer1.close()
     }
 
-    val props2 = new util.Properties()
-    props2.put("metadata.broker.list", "localhost:80," + TestUtils.getBrokerListStrFromConfigs(Seq( config1)))
-    props2.put("serializer.class", "kafka.serializer.StringEncoder")
-    val producerConfig2= new ProducerConfig(props2)
-    val producer2 = new Producer[String, String](producerConfig2)
+    val producer2 = TestUtils.createProducer[String, String](
+      brokerList = "localhost:80," + TestUtils.getBrokerListStrFromConfigs(Seq(config1)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName)
+
     try{
       producer2.send(new KeyedMessage[String, String](topic, "test", "test1"))
     } catch {
@@ -119,11 +127,11 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
       producer2.close()
     }
 
-    val props3 = new util.Properties()
-    props3.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)))
-    props3.put("serializer.class", "kafka.serializer.StringEncoder")
-    val producerConfig3 = new ProducerConfig(props3)
-    val producer3 = new Producer[String, String](producerConfig3)
+    val producer3 =  TestUtils.createProducer[String, String](
+      brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName)
+
     try{
       producer3.send(new KeyedMessage[String, String](topic, "test", "test1"))
     } catch {
@@ -136,28 +144,19 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   @Test
   def testSendToNewTopic() {
     val props1 = new util.Properties()
-    props1.put("serializer.class", "kafka.serializer.StringEncoder")
-    props1.put("partitioner.class", "kafka.utils.StaticPartitioner")
-    props1.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)))
-    props1.put("request.required.acks", "2")
-    props1.put("request.timeout.ms", "1000")
-
-    val props2 = new util.Properties()
-    props2.putAll(props1)
-    props2.put("request.required.acks", "3")
-    props2.put("request.timeout.ms", "1000")
-
-    val producerConfig1 = new ProducerConfig(props1)
-    val producerConfig2 = new ProducerConfig(props2)
+    props1.put("request.required.acks", "-1")
 
     val topic = "new-topic"
     // create topic with 1 partition and await leadership
-    AdminUtils.createTopic(zkClient, topic, 1, 2)
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    TestUtils.createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 2, servers = servers)
+
+    val producer1 = TestUtils.createProducer[String, String](
+      brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName,
+      partitioner = classOf[StaticPartitioner].getName,
+      producerProps = props1)
 
-    val producer1 = new Producer[String, String](producerConfig1)
-    val producer2 = new Producer[String, String](producerConfig2)
     // Available partition ids should be 0.
     producer1.send(new KeyedMessage[String, String](topic, "test", "test1"))
     producer1.send(new KeyedMessage[String, String](topic, "test", "test2"))
@@ -178,16 +177,25 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
     assertEquals(new Message(bytes = "test2".getBytes, key = "test".getBytes), messageSet(1).message)
     producer1.close()
 
+    val props2 = new util.Properties()
+    props2.put("request.required.acks", "3")
+    // no need to retry since the send will always fail
+    props2.put("message.send.max.retries", "0")
+
     try {
-      producer2.send(new KeyedMessage[String, String](topic, "test", "test2"))
-      fail("Should have timed out for 3 acks.")
+      val producer2 = TestUtils.createProducer[String, String](
+        brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[StringEncoder].getName,
+        partitioner = classOf[StaticPartitioner].getName,
+        producerProps = props2)
+        producer2.close
+        fail("we don't support request.required.acks greater than 1")
     }
     catch {
-      case se: FailedToSendMessageException => true
+      case iae: IllegalArgumentException =>  // this is expected
       case e: Throwable => fail("Not expected", e)
-    }
-    finally {
-      producer2.close()
+
     }
   }
 
@@ -195,24 +203,23 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   @Test
   def testSendWithDeadBroker() {
     val props = new Properties()
-    props.put("serializer.class", "kafka.serializer.StringEncoder")
-    props.put("partitioner.class", "kafka.utils.StaticPartitioner")
-    props.put("request.timeout.ms", "2000")
     props.put("request.required.acks", "1")
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)))
+    // No need to retry since the topic will be created beforehand and normal send will succeed on the first try.
+    // Reducing the retries will save the time on the subsequent failure test.
+    props.put("message.send.max.retries", "0")
 
     val topic = "new-topic"
     // create topic
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(0), 1->Seq(0), 2->Seq(0), 3->Seq(0)))
-    // waiting for 1 partition is enough
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 1, 500)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 2, 500)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 3, 500)
-
-    val config = new ProducerConfig(props)
-    val producer = new Producer[String, String](config)
+    TestUtils.createTopic(zkClient, topic, partitionReplicaAssignment = Map(0->Seq(0), 1->Seq(0), 2->Seq(0), 3->Seq(0)),
+                          servers = servers)
+
+    val producer = TestUtils.createProducer[String, String](
+      brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName,
+      partitioner = classOf[StaticPartitioner].getName,
+      producerProps = props)
+
     try {
       // Available partition ids should be 0, 1, 2 and 3, all lead and hosted only
       // on broker 0
@@ -236,7 +243,8 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
 
     // restart server 1
     server1.startup()
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
+    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0)
 
     try {
       // cross check if broker 1 got the messages
@@ -255,20 +263,20 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
   def testAsyncSendCanCorrectlyFailWithTimeout() {
     val timeoutMs = 500
     val props = new Properties()
-    props.put("serializer.class", "kafka.serializer.StringEncoder")
-    props.put("partitioner.class", "kafka.utils.StaticPartitioner")
     props.put("request.timeout.ms", String.valueOf(timeoutMs))
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)))
     props.put("request.required.acks", "1")
+    props.put("message.send.max.retries", "0")
     props.put("client.id","ProducerTest-testAsyncSendCanCorrectlyFailWithTimeout")
-    val config = new ProducerConfig(props)
-    val producer = new Producer[String, String](config)
+    val producer = TestUtils.createProducer[String, String](
+      brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName,
+      partitioner = classOf[StaticPartitioner].getName,
+      producerProps = props)
 
     val topic = "new-topic"
     // create topics in ZK
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(0,1)))
-    TestUtils.waitUntilMetadataIsPropagated(servers, topic, 0, 1000)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    TestUtils.createTopic(zkClient, topic, partitionReplicaAssignment = Map(0->Seq(0,1)), servers = servers)
 
     // do a simple test to make sure plumbing is okay
     try {
@@ -300,28 +308,28 @@ class ProducerTest extends JUnit3Suite with ZooKeeperTestHarness with Logging{
     }
     val t2 = SystemTime.milliseconds
 
-    // make sure we don't wait fewer than numRetries*timeoutMs milliseconds
-    // we do this because the DefaultEventHandler retries a number of times
-    assertTrue((t2-t1) >= timeoutMs*config.messageSendMaxRetries)
+    // make sure we don't wait fewer than timeoutMs
+    assertTrue((t2-t1) >= timeoutMs)
   }
-  
+
   @Test
   def testSendNullMessage() {
-    val props = new Properties()
-    props.put("serializer.class", "kafka.serializer.StringEncoder")
-    props.put("partitioner.class", "kafka.utils.StaticPartitioner")
-    props.put("metadata.broker.list", TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)))
-    
-    val config = new ProducerConfig(props)
-    val producer = new Producer[String, String](config)
+    val producer = TestUtils.createProducer[String, String](
+      brokerList = TestUtils.getBrokerListStrFromConfigs(Seq(config1, config2)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[StringEncoder].getName,
+      partitioner = classOf[StaticPartitioner].getName)
+
     try {
 
       // create topic
       AdminUtils.createTopic(zkClient, "new-topic", 2, 1)
-      assertTrue("Topic new-topic not created after timeout", TestUtils.waitUntilTrue(() =>
-        AdminUtils.fetchTopicMetadataFromZk("new-topic", zkClient).errorCode != ErrorMapping.UnknownTopicOrPartitionCode, zookeeper.tickTime))
-      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "new-topic", 0, 500)
-    
+      TestUtils.waitUntilTrue(() =>
+        AdminUtils.fetchTopicMetadataFromZk("new-topic", zkClient).errorCode != ErrorMapping.UnknownTopicOrPartitionCode,
+        "Topic new-topic not created after timeout",
+        waitTime = zookeeper.tickTime)
+      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "new-topic", 0)
+
       producer.send(new KeyedMessage[String, String]("new-topic", "key", null))
     } finally {
       producer.close()
diff --git a/core/src/test/scala/unit/kafka/producer/SyncProducerTest.scala b/core/src/test/scala/unit/kafka/producer/SyncProducerTest.scala
index 8d63e312d3eda..d60d8e0f49443 100644
--- a/core/src/test/scala/unit/kafka/producer/SyncProducerTest.scala
+++ b/core/src/test/scala/unit/kafka/producer/SyncProducerTest.scala
@@ -18,6 +18,7 @@
 package kafka.producer
 
 import java.net.SocketTimeoutException
+import java.util.Properties
 import junit.framework.Assert
 import kafka.admin.AdminUtils
 import kafka.integration.KafkaServerTestHarness
@@ -31,7 +32,8 @@ import kafka.common.{TopicAndPartition, ErrorMapping}
 
 class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
   private var messageBytes =  new Array[Byte](2);
-  val configs = List(new KafkaConfig(TestUtils.createBrokerConfigs(1).head))
+  // turning off controlled shutdown since testProducerCanTimeout() explicitly shuts down request handler pool.
+  val configs = List(new KafkaConfig(TestUtils.createBrokerConfigs(1, false).head))
   val zookeeperConnect = TestZKUtils.zookeeperConnect
 
   @Test
@@ -92,8 +94,7 @@ class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
     val props = TestUtils.getSyncProducerConfig(server.socketServer.port)
 
     val producer = new SyncProducer(new SyncProducerConfig(props))
-    AdminUtils.createTopic(zkClient, "test", 1, 1)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "test", 0, 500)
+    TestUtils.createTopic(zkClient, "test", numPartitions = 1, replicationFactor = 1, servers = servers)
 
     val message1 = new Message(new Array[Byte](configs(0).messageMaxBytes + 1))
     val messageSet1 = new ByteBufferMessageSet(compressionCodec = NoCompressionCodec, messages = message1)
@@ -113,6 +114,7 @@ class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
     Assert.assertEquals(0, response2.status(TopicAndPartition("test", 0)).offset)
   }
 
+
   @Test
   def testMessageSizeTooLargeWithAckZero() {
     val server = servers.head
@@ -122,7 +124,7 @@ class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
 
     val producer = new SyncProducer(new SyncProducerConfig(props))
     AdminUtils.createTopic(zkClient, "test", 1, 1)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "test", 0, 500)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "test", 0)
 
     // This message will be dropped silently since message size too large.
     producer.send(TestUtils.produceRequest("test", 0,
@@ -163,9 +165,9 @@ class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
 
     // #2 - test that we get correct offsets when partition is owned by broker
     AdminUtils.createTopic(zkClient, "topic1", 1, 1)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "topic1", 0, 500)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "topic1", 0)
     AdminUtils.createTopic(zkClient, "topic3", 1, 1)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "topic3", 0, 500)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, "topic3", 0)
 
     val response2 = producer.send(request)
     Assert.assertNotNull(response2)
@@ -225,4 +227,24 @@ class SyncProducerTest extends JUnit3Suite with KafkaServerTestHarness {
     val response = producer.send(emptyRequest)
     Assert.assertTrue(response == null)
   }
+
+  @Test
+  def testNotEnoughReplicas()  {
+    val topicName = "minisrtest"
+    val server = servers.head
+
+    val props = TestUtils.getSyncProducerConfig(server.socketServer.port)
+    props.put("request.required.acks", "-1")
+
+    val producer = new SyncProducer(new SyncProducerConfig(props))
+    val topicProps = new Properties()
+    topicProps.put("min.insync.replicas","2")
+    AdminUtils.createTopic(zkClient, topicName, 1, 1,topicProps)
+    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topicName, 0)
+
+    val response = producer.send(TestUtils.produceRequest(topicName, 0,
+      new ByteBufferMessageSet(compressionCodec = NoCompressionCodec, messages = new Message(messageBytes)),-1))
+
+    Assert.assertEquals(ErrorMapping.NotEnoughReplicasCode, response.status(TopicAndPartition(topicName, 0)).error)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/DelayedOperationTest.scala b/core/src/test/scala/unit/kafka/server/DelayedOperationTest.scala
new file mode 100644
index 0000000000000..93f52d3222fc1
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/DelayedOperationTest.scala
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import org.junit.Test
+import org.scalatest.junit.JUnit3Suite
+import junit.framework.Assert._
+import kafka.utils.TestUtils
+
+class DelayedOperationTest extends JUnit3Suite {
+
+  var purgatory: DelayedOperationPurgatory[MockDelayedOperation] = null
+  
+  override def setUp() {
+    super.setUp()
+    purgatory = new DelayedOperationPurgatory[MockDelayedOperation](0, 5)
+  }
+  
+  override def tearDown() {
+    purgatory.shutdown()
+    super.tearDown()
+  }
+
+  @Test
+  def testRequestSatisfaction() {
+    val r1 = new MockDelayedOperation(100000L)
+    val r2 = new MockDelayedOperation(100000L)
+    assertEquals("With no waiting requests, nothing should be satisfied", 0, purgatory.checkAndComplete("test1"))
+    assertFalse("r1 not satisfied and hence watched", purgatory.tryCompleteElseWatch(r1, Array("test1")))
+    assertEquals("Still nothing satisfied", 0, purgatory.checkAndComplete("test1"))
+    assertFalse("r2 not satisfied and hence watched", purgatory.tryCompleteElseWatch(r2, Array("test2")))
+    assertEquals("Still nothing satisfied", 0, purgatory.checkAndComplete("test2"))
+    r1.completable = true
+    assertEquals("r1 satisfied", 1, purgatory.checkAndComplete("test1"))
+    assertEquals("Nothing satisfied", 0, purgatory.checkAndComplete("test1"))
+    r2.completable = true
+    assertEquals("r2 satisfied", 1, purgatory.checkAndComplete("test2"))
+    assertEquals("Nothing satisfied", 0, purgatory.checkAndComplete("test2"))
+  }
+
+  @Test
+  def testRequestExpiry() {
+    val expiration = 20L
+    val r1 = new MockDelayedOperation(expiration)
+    val r2 = new MockDelayedOperation(200000L)
+    val start = System.currentTimeMillis
+    assertFalse("r1 not satisfied and hence watched", purgatory.tryCompleteElseWatch(r1, Array("test1")))
+    assertFalse("r2 not satisfied and hence watched", purgatory.tryCompleteElseWatch(r2, Array("test2")))
+    r1.awaitExpiration()
+    val elapsed = System.currentTimeMillis - start
+    assertTrue("r1 completed due to expiration", r1.isCompleted())
+    assertFalse("r2 hasn't completed", r2.isCompleted())
+    assertTrue("Time for expiration %d should at least %d".format(elapsed, expiration), elapsed >= expiration)
+  }
+
+  @Test
+  def testRequestPurge() {
+    val r1 = new MockDelayedOperation(100000L)
+    val r2 = new MockDelayedOperation(100000L)
+    purgatory.tryCompleteElseWatch(r1, Array("test1"))
+    purgatory.tryCompleteElseWatch(r2, Array("test1", "test2"))
+    purgatory.tryCompleteElseWatch(r1, Array("test2", "test3"))
+
+    assertEquals("Purgatory should have 5 watched elements", 5, purgatory.watched())
+    assertEquals("Purgatory should have 3 total delayed operations", 3, purgatory.delayed())
+
+    // complete one of the operations, it should
+    // eventually be purged from the watch list with purge interval 5
+    r2.completable = true
+    r2.tryComplete()
+    TestUtils.waitUntilTrue(() => purgatory.watched() == 3,
+      "Purgatory should have 3 watched elements instead of " + purgatory.watched(), 1000L)
+    TestUtils.waitUntilTrue(() => purgatory.delayed() == 3,
+      "Purgatory should still have 3 total delayed operations instead of " + purgatory.delayed(), 1000L)
+
+    // add two more requests, then the satisfied request should be purged from the delayed queue with purge interval 5
+    purgatory.tryCompleteElseWatch(r1, Array("test1"))
+    purgatory.tryCompleteElseWatch(r1, Array("test1"))
+
+    TestUtils.waitUntilTrue(() => purgatory.watched() == 5,
+      "Purgatory should have 5 watched elements instead of " + purgatory.watched(), 1000L)
+    TestUtils.waitUntilTrue(() => purgatory.delayed() == 4,
+      "Purgatory should have 4 total delayed operations instead of " + purgatory.delayed(), 1000L)
+  }
+  
+  class MockDelayedOperation(delayMs: Long) extends DelayedOperation(delayMs) {
+    var completable = false
+
+    def awaitExpiration() {
+      synchronized {
+        wait()
+      }
+    }
+
+    override def tryComplete() = {
+      if (completable)
+        forceComplete()
+      else
+        false
+    }
+
+    override def onComplete() {
+      synchronized {
+        notify()
+      }
+    }
+  }
+  
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala b/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala
new file mode 100644
index 0000000000000..ad121169a5e80
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server
+
+import junit.framework.Assert._
+import org.junit.Test
+import kafka.integration.KafkaServerTestHarness
+import kafka.utils._
+import kafka.common._
+import kafka.log.LogConfig
+import kafka.admin.{AdminOperationException, AdminUtils}
+import org.scalatest.junit.JUnit3Suite
+
+class DynamicConfigChangeTest extends JUnit3Suite with KafkaServerTestHarness {
+  
+  override val configs = List(new KafkaConfig(TestUtils.createBrokerConfig(0, TestUtils.choosePort)))
+
+  @Test
+  def testConfigChange() {
+    val oldVal = 100000
+    val newVal = 200000
+    val tp = TopicAndPartition("test", 0)
+    AdminUtils.createTopic(zkClient, tp.topic, 1, 1, LogConfig(flushInterval = oldVal).toProps)
+    TestUtils.retry(10000) {
+      val logOpt = this.servers(0).logManager.getLog(tp)
+      assertTrue(logOpt.isDefined)
+      assertEquals(oldVal, logOpt.get.config.flushInterval)
+    }
+    AdminUtils.changeTopicConfig(zkClient, tp.topic, LogConfig(flushInterval = newVal).toProps)
+    TestUtils.retry(10000) {
+      assertEquals(newVal, this.servers(0).logManager.getLog(tp).get.config.flushInterval)
+    }
+  }
+
+  @Test
+  def testConfigChangeOnNonExistingTopic() {
+    val topic = TestUtils.tempTopic
+    try {
+      AdminUtils.changeTopicConfig(zkClient, topic, LogConfig(flushInterval = 10000).toProps)
+      fail("Should fail with AdminOperationException for topic doesn't exist")
+    } catch {
+      case e: AdminOperationException => // expected
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
index 02c188a412995..8913fc1d59f71 100644
--- a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
+++ b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
@@ -32,15 +32,11 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
 
   val configs = TestUtils.createBrokerConfigs(2).map(new KafkaConfig(_))
   val topic = "foo"
-  val logManagers = configs.map(config => new LogManager(logDirs = config.logDirs.map(new File(_)).toArray,
-                                                         topicConfigs = Map(),
-                                                         defaultConfig = LogConfig(),
-                                                         cleanerConfig = CleanerConfig(),
-                                                         flushCheckMs = 30000,
-                                                         flushCheckpointMs = 10000L,
-                                                         retentionCheckMs = 30000,
-                                                         scheduler = new KafkaScheduler(1),
-                                                         time = new MockTime))
+  val logManagers = configs map { config =>
+    TestUtils.createLogManager(
+      logDirs = config.logDirs.map(new File(_)).toArray,
+      cleanerConfig = CleanerConfig())
+  }
     
   @After
   def teardown() {
@@ -62,7 +58,7 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     replicaManager.checkpointHighWatermarks()
     var fooPartition0Hw = hwmFor(replicaManager, topic, 0)
     assertEquals(0L, fooPartition0Hw)
-    val partition0 = replicaManager.getOrCreatePartition(topic, 0, 1)
+    val partition0 = replicaManager.getOrCreatePartition(topic, 0)
     // create leader and follower replicas
     val log0 = logManagers(0).createLog(TopicAndPartition(topic, 0), LogConfig())
     val leaderReplicaPartition0 = new Replica(configs.head.brokerId, partition0, SystemTime, 0, Some(log0))
@@ -71,19 +67,16 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     partition0.addReplicaIfNotExists(followerReplicaPartition0)
     replicaManager.checkpointHighWatermarks()
     fooPartition0Hw = hwmFor(replicaManager, topic, 0)
-    assertEquals(leaderReplicaPartition0.highWatermark, fooPartition0Hw)
-    try {
-      followerReplicaPartition0.highWatermark
-      fail("Should fail with KafkaException")
-    }catch {
-      case e: KafkaException => // this is ok
-    }
-    // set the highwatermark for local replica
-    partition0.getReplica().get.highWatermark = 5L
+    assertEquals(leaderReplicaPartition0.highWatermark.messageOffset, fooPartition0Hw)
+    // set the high watermark for local replica
+    partition0.getReplica().get.highWatermark = new LogOffsetMetadata(5L)
     replicaManager.checkpointHighWatermarks()
     fooPartition0Hw = hwmFor(replicaManager, topic, 0)
-    assertEquals(leaderReplicaPartition0.highWatermark, fooPartition0Hw)
+    assertEquals(leaderReplicaPartition0.highWatermark.messageOffset, fooPartition0Hw)
     EasyMock.verify(zkClient)
+
+    // shutdown the replica manager upon test completion
+    replicaManager.shutdown(false)
   }
 
   def testHighWatermarkPersistenceMultiplePartitions() {
@@ -101,7 +94,7 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     replicaManager.checkpointHighWatermarks()
     var topic1Partition0Hw = hwmFor(replicaManager, topic1, 0)
     assertEquals(0L, topic1Partition0Hw)
-    val topic1Partition0 = replicaManager.getOrCreatePartition(topic1, 0, 1)
+    val topic1Partition0 = replicaManager.getOrCreatePartition(topic1, 0)
     // create leader log
     val topic1Log0 = logManagers(0).createLog(TopicAndPartition(topic1, 0), LogConfig())
     // create a local replica for topic1
@@ -109,15 +102,15 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     topic1Partition0.addReplicaIfNotExists(leaderReplicaTopic1Partition0)
     replicaManager.checkpointHighWatermarks()
     topic1Partition0Hw = hwmFor(replicaManager, topic1, 0)
-    assertEquals(leaderReplicaTopic1Partition0.highWatermark, topic1Partition0Hw)
-    // set the highwatermark for local replica
-    topic1Partition0.getReplica().get.highWatermark = 5L
+    assertEquals(leaderReplicaTopic1Partition0.highWatermark.messageOffset, topic1Partition0Hw)
+    // set the high watermark for local replica
+    topic1Partition0.getReplica().get.highWatermark = new LogOffsetMetadata(5L)
     replicaManager.checkpointHighWatermarks()
     topic1Partition0Hw = hwmFor(replicaManager, topic1, 0)
-    assertEquals(5L, leaderReplicaTopic1Partition0.highWatermark)
+    assertEquals(5L, leaderReplicaTopic1Partition0.highWatermark.messageOffset)
     assertEquals(5L, topic1Partition0Hw)
     // add another partition and set highwatermark
-    val topic2Partition0 = replicaManager.getOrCreatePartition(topic2, 0, 1)
+    val topic2Partition0 = replicaManager.getOrCreatePartition(topic2, 0)
     // create leader log
     val topic2Log0 = logManagers(0).createLog(TopicAndPartition(topic2, 0), LogConfig())
     // create a local replica for topic2
@@ -125,13 +118,13 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     topic2Partition0.addReplicaIfNotExists(leaderReplicaTopic2Partition0)
     replicaManager.checkpointHighWatermarks()
     var topic2Partition0Hw = hwmFor(replicaManager, topic2, 0)
-    assertEquals(leaderReplicaTopic2Partition0.highWatermark, topic2Partition0Hw)
+    assertEquals(leaderReplicaTopic2Partition0.highWatermark.messageOffset, topic2Partition0Hw)
     // set the highwatermark for local replica
-    topic2Partition0.getReplica().get.highWatermark = 15L
-    assertEquals(15L, leaderReplicaTopic2Partition0.highWatermark)
+    topic2Partition0.getReplica().get.highWatermark = new LogOffsetMetadata(15L)
+    assertEquals(15L, leaderReplicaTopic2Partition0.highWatermark.messageOffset)
     // change the highwatermark for topic1
-    topic1Partition0.getReplica().get.highWatermark = 10L
-    assertEquals(10L, leaderReplicaTopic1Partition0.highWatermark)
+    topic1Partition0.getReplica().get.highWatermark = new LogOffsetMetadata(10L)
+    assertEquals(10L, leaderReplicaTopic1Partition0.highWatermark.messageOffset)
     replicaManager.checkpointHighWatermarks()
     // verify checkpointed hw for topic 2
     topic2Partition0Hw = hwmFor(replicaManager, topic2, 0)
@@ -140,10 +133,14 @@ class HighwatermarkPersistenceTest extends JUnit3Suite {
     topic1Partition0Hw = hwmFor(replicaManager, topic1, 0)
     assertEquals(10L, topic1Partition0Hw)
     EasyMock.verify(zkClient)
+
+    // shutdown the replica manager upon test completion
+    replicaManager.shutdown(false)
+
   }
 
   def hwmFor(replicaManager: ReplicaManager, topic: String, partition: Int): Long = {
-    replicaManager.highWatermarkCheckpoints(replicaManager.config.logDirs(0)).read.getOrElse(TopicAndPartition(topic, partition), 0L)
+    replicaManager.highWatermarkCheckpoints(new File(replicaManager.config.logDirs(0)).getAbsolutePath).read.getOrElse(TopicAndPartition(topic, partition), 0L)
   }
   
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/server/ISRExpirationTest.scala b/core/src/test/scala/unit/kafka/server/ISRExpirationTest.scala
index 2cd3a3faf7be2..a703d2715048c 100644
--- a/core/src/test/scala/unit/kafka/server/ISRExpirationTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ISRExpirationTest.scala
@@ -36,8 +36,21 @@ class IsrExpirationTest extends JUnit3Suite {
   })
   val topic = "foo"
 
+  val time = new MockTime
+
+  var replicaManager: ReplicaManager = null
+
+  override def setUp() {
+    super.setUp()
+    replicaManager = new ReplicaManager(configs.head, time, null, null, null, new AtomicBoolean(false))
+  }
+
+  override def tearDown() {
+    replicaManager.shutdown(false)
+    super.tearDown()
+  }
+
   def testIsrExpirationForStuckFollowers() {
-    val time = new MockTime
     val log = getLogWithLogEndOffset(15L, 2) // set logEndOffset for leader to 15L
 
     // create one partition and all replicas
@@ -46,7 +59,7 @@ class IsrExpirationTest extends JUnit3Suite {
     val leaderReplica = partition0.getReplica(configs.head.brokerId).get
 
     // let the follower catch up to 10
-    (partition0.assignedReplicas() - leaderReplica).foreach(r => r.logEndOffset = 10)
+    (partition0.assignedReplicas() - leaderReplica).foreach(r => r.logEndOffset = new LogOffsetMetadata(10L))
     var partition0OSR = partition0.getOutOfSyncReplicas(leaderReplica, configs.head.replicaLagTimeMaxMs, configs.head.replicaLagMaxMessages)
     assertEquals("No replica should be out of sync", Set.empty[Int], partition0OSR.map(_.brokerId))
 
@@ -61,7 +74,6 @@ class IsrExpirationTest extends JUnit3Suite {
   }
 
   def testIsrExpirationForSlowFollowers() {
-    val time = new MockTime
     // create leader replica
     val log = getLogWithLogEndOffset(15L, 1)
     // add one partition
@@ -69,7 +81,7 @@ class IsrExpirationTest extends JUnit3Suite {
     assertEquals("All replicas should be in ISR", configs.map(_.brokerId).toSet, partition0.inSyncReplicas.map(_.brokerId))
     val leaderReplica = partition0.getReplica(configs.head.brokerId).get
     // set remote replicas leo to something low, like 4
-    (partition0.assignedReplicas() - leaderReplica).foreach(r => r.logEndOffset = 4L)
+    (partition0.assignedReplicas() - leaderReplica).foreach(r => r.logEndOffset = new LogOffsetMetadata(4L))
 
     // now follower (broker id 1) has caught up to only 4, while the leader is at 15. Since the gap it larger than
     // replicaMaxLagBytes, the follower is out of sync.
@@ -82,8 +94,7 @@ class IsrExpirationTest extends JUnit3Suite {
   private def getPartitionWithAllReplicasInIsr(topic: String, partitionId: Int, time: Time, config: KafkaConfig,
                                                localLog: Log): Partition = {
     val leaderId=config.brokerId
-    val replicaManager = new ReplicaManager(config, time, null, null, null, new AtomicBoolean(false))
-    val partition = replicaManager.getOrCreatePartition(topic, partitionId, 1)
+    val partition = replicaManager.getOrCreatePartition(topic, partitionId)
     val leaderReplica = new Replica(leaderId, partition, time, 0, Some(localLog))
 
     val allReplicas = getFollowerReplicas(partition, leaderId, time) :+ leaderReplica
@@ -97,7 +108,7 @@ class IsrExpirationTest extends JUnit3Suite {
 
   private def getLogWithLogEndOffset(logEndOffset: Long, expectedCalls: Int): Log = {
     val log1 = EasyMock.createMock(classOf[kafka.log.Log])
-    EasyMock.expect(log1.logEndOffset).andReturn(logEndOffset).times(expectedCalls)
+    EasyMock.expect(log1.logEndOffsetMetadata).andReturn(new LogOffsetMetadata(logEndOffset)).times(expectedCalls)
     EasyMock.replay(log1)
 
     log1
diff --git a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
index 89c207a3f56c7..2377abe4933e0 100644
--- a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
@@ -44,6 +44,16 @@ class KafkaConfigTest extends JUnit3Suite {
 
   }
   
+  @Test
+  def testLogRetentionTimeMsProvided() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("log.retention.ms", "1800000")
+
+    val cfg = new KafkaConfig(props)
+    assertEquals(30 * 60L * 1000L, cfg.logRetentionTimeMillis)
+
+  }
+  
   @Test
   def testLogRetentionTimeNoConfigProvided() {
     val props = TestUtils.createBrokerConfig(0, 8181)
@@ -63,6 +73,17 @@ class KafkaConfigTest extends JUnit3Suite {
     assertEquals( 30 * 60L * 1000L, cfg.logRetentionTimeMillis)
 
   }
+  
+  @Test
+  def testLogRetentionTimeBothMinutesAndMsProvided() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("log.retention.ms", "1800000")
+    props.put("log.retention.minutes", "10")
+
+    val cfg = new KafkaConfig(props)
+    assertEquals( 30 * 60L * 1000L, cfg.logRetentionTimeMillis)
+
+  }
 
   @Test
   def testAdvertiseDefaults() {
@@ -93,5 +114,72 @@ class KafkaConfigTest extends JUnit3Suite {
     assertEquals(serverConfig.advertisedHostName, advertisedHostName)
     assertEquals(serverConfig.advertisedPort, advertisedPort)
   }
+
+  @Test
+  def testUncleanLeaderElectionDefault() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    val serverConfig = new KafkaConfig(props)
+
+    assertEquals(serverConfig.uncleanLeaderElectionEnable, true)
+  }
+
+  @Test
+  def testUncleanElectionDisabled() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("unclean.leader.election.enable", String.valueOf(false))
+    val serverConfig = new KafkaConfig(props)
+
+    assertEquals(serverConfig.uncleanLeaderElectionEnable, false)
+  }
+
+  @Test
+  def testUncleanElectionEnabled() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("unclean.leader.election.enable", String.valueOf(true))
+    val serverConfig = new KafkaConfig(props)
+
+    assertEquals(serverConfig.uncleanLeaderElectionEnable, true)
+  }
+
+  @Test
+  def testUncleanElectionInvalid() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("unclean.leader.election.enable", "invalid")
+
+    intercept[IllegalArgumentException] {
+      new KafkaConfig(props)
+    }
+  }
+  
+  @Test
+  def testLogRollTimeMsProvided() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("log.roll.ms", "1800000")
+
+    val cfg = new KafkaConfig(props)
+    assertEquals(30 * 60L * 1000L, cfg.logRollTimeMillis)
+
+  }
   
+  @Test
+  def testLogRollTimeBothMsAndHoursProvided() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+    props.put("log.roll.ms", "1800000")
+    props.put("log.roll.hours", "1")
+
+    val cfg = new KafkaConfig(props)
+    assertEquals( 30 * 60L * 1000L, cfg.logRollTimeMillis)
+
+  }
+    
+  @Test
+  def testLogRollTimeNoConfigProvided() {
+    val props = TestUtils.createBrokerConfig(0, 8181)
+
+    val cfg = new KafkaConfig(props)
+    assertEquals(24 * 7 * 60L * 60L * 1000L, cfg.logRollTimeMillis																									)
+
+  }
+  
+
 }
diff --git a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
index 38e3ae72a87e1..c2ba07c5fdbaf 100644
--- a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
@@ -19,7 +19,6 @@ package kafka.server
 
 import org.scalatest.junit.JUnit3Suite
 import kafka.zk.ZooKeeperTestHarness
-import kafka.admin.AdminUtils
 import kafka.utils.TestUtils._
 import junit.framework.Assert._
 import kafka.utils.{ZkUtils, Utils, TestUtils}
@@ -35,8 +34,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
   val port1 = TestUtils.choosePort()
   val port2 = TestUtils.choosePort()
 
-  val configProps1 = TestUtils.createBrokerConfig(brokerId1, port1)
-  val configProps2 = TestUtils.createBrokerConfig(brokerId2, port2)
+  val configProps1 = TestUtils.createBrokerConfig(brokerId1, port1, false)
+  val configProps2 = TestUtils.createBrokerConfig(brokerId2, port2, false)
   var servers: Seq[KafkaServer] = Seq.empty[KafkaServer]
 
   var staleControllerEpochDetected = false
@@ -61,10 +60,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
     val partitionId = 0
 
     // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0 -> Seq(0, 1)))
+    val leader1 = createTopic(zkClient, topic, partitionReplicaAssignment = Map(0 -> Seq(0, 1)), servers = servers)(0)
 
-    // wait until leader is elected
-    val leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
     val leaderEpoch1 = ZkUtils.getEpochForPartition(zkClient, topic, partitionId)
     debug("leader Epoc: " + leaderEpoch1)
     debug("Leader is elected to be: %s".format(leader1.getOrElse(-1)))
@@ -76,8 +73,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
     // kill the server hosting the preferred replica
     servers.last.shutdown()
     // check if leader moves to the other server
-    val leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 1500,
-      if(leader1.get == 0) None else leader1)
+    val leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId,
+                                                    oldLeaderOpt = if(leader1.get == 0) None else leader1)
     val leaderEpoch2 = ZkUtils.getEpochForPartition(zkClient, topic, partitionId)
     debug("Leader is elected to be: %s".format(leader1.getOrElse(-1)))
     debug("leader Epoc: " + leaderEpoch2)
@@ -90,8 +87,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
     servers.last.startup()
     servers.head.shutdown()
     Thread.sleep(zookeeper.tickTime)
-    val leader3 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 1500,
-      if(leader2.get == 1) None else leader2)
+    val leader3 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId,
+                                                    oldLeaderOpt = if(leader2.get == 1) None else leader2)
     val leaderEpoch3 = ZkUtils.getEpochForPartition(zkClient, topic, partitionId)
     debug("leader Epoc: " + leaderEpoch3)
     debug("Leader is elected to be: %s".format(leader3.getOrElse(-1)))
@@ -108,10 +105,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
     val partitionId = 0
 
     // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0 -> Seq(0, 1)))
+    val leader1 = createTopic(zkClient, topic, partitionReplicaAssignment = Map(0 -> Seq(0, 1)), servers = servers)(0)
 
-    // wait until leader is elected
-    val leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
     val leaderEpoch1 = ZkUtils.getEpochForPartition(zkClient, topic, partitionId)
     debug("leader Epoc: " + leaderEpoch1)
     debug("Leader is elected to be: %s".format(leader1.getOrElse(-1)))
@@ -137,7 +132,8 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
                                                       staleControllerEpoch, 0, "")
 
     controllerChannelManager.sendRequest(brokerId2, leaderAndIsrRequest, staleControllerEpochCallback)
-    TestUtils.waitUntilTrue(() => staleControllerEpochDetected == true, 1000)
+    TestUtils.waitUntilTrue(() => staleControllerEpochDetected == true,
+                            "Controller epoch should be stale")
     assertTrue("Stale controller epoch not detected by the broker", staleControllerEpochDetected)
 
     controllerChannelManager.shutdown()
@@ -150,4 +146,4 @@ class LeaderElectionTest extends JUnit3Suite with ZooKeeperTestHarness {
       case _ => false
     }
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
index e10228555550d..c06ee756bf0fe 100644
--- a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
@@ -22,7 +22,6 @@ import kafka.utils._
 import junit.framework.Assert._
 import java.util.{Random, Properties}
 import kafka.consumer.SimpleConsumer
-import org.junit.{After, Before, Test}
 import kafka.message.{NoCompressionCodec, ByteBufferMessageSet, Message}
 import kafka.zk.ZooKeeperTestHarness
 import org.scalatest.junit.JUnit3Suite
@@ -30,8 +29,6 @@ import kafka.admin.AdminUtils
 import kafka.api.{PartitionOffsetRequestInfo, FetchRequestBuilder, OffsetRequest}
 import kafka.utils.TestUtils._
 import kafka.common.{ErrorMapping, TopicAndPartition}
-import kafka.utils.nonthreadsafe
-import kafka.utils.threadsafe
 import org.junit.After
 import org.junit.Before
 import org.junit.Test
@@ -85,8 +82,8 @@ class LogOffsetTest extends JUnit3Suite with ZooKeeperTestHarness {
     AdminUtils.createTopic(zkClient, topic, 1, 1)
 
     val logManager = server.getLogManager
-    assertTrue("Log for partition [topic,0] should be created",
-      waitUntilTrue(() => logManager.getLog(TopicAndPartition(topic, part)).isDefined, 1000))
+    waitUntilTrue(() => logManager.getLog(TopicAndPartition(topic, part)).isDefined,
+                  "Log for partition [topic,0] should be created")
     val log = logManager.getLog(TopicAndPartition(topic, part)).get
 
     val message = new Message(Integer.toString(42).getBytes())
@@ -95,16 +92,16 @@ class LogOffsetTest extends JUnit3Suite with ZooKeeperTestHarness {
     log.flush()
 
     val offsets = server.apis.fetchOffsets(logManager, TopicAndPartition(topic, part), OffsetRequest.LatestTime, 10)
-    assertEquals(Seq(20L, 16L, 12L, 8L, 4L, 0L), offsets)
+    assertEquals(Seq(20L, 18L, 15L, 12L, 9L, 6L, 3L, 0), offsets)
 
-    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), 1000)
+    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), "Leader should be elected")
     val topicAndPartition = TopicAndPartition(topic, part)
     val offsetRequest = OffsetRequest(
       Map(topicAndPartition -> PartitionOffsetRequestInfo(OffsetRequest.LatestTime, 10)),
       replicaId = 0)
     val consumerOffsets =
       simpleConsumer.getOffsetsBefore(offsetRequest).partitionErrorAndOffsets(topicAndPartition).offsets
-    assertEquals(Seq(20L, 16L, 12L, 8L, 4L, 0L), consumerOffsets)
+    assertEquals(Seq(20L, 18L, 15L, 12L, 9L, 6L, 3L, 0), consumerOffsets)
 
     // try to fetch using latest offset
     val fetchResponse = simpleConsumer.fetch(
@@ -122,8 +119,7 @@ class LogOffsetTest extends JUnit3Suite with ZooKeeperTestHarness {
     val topic = topicPartition.split("-").head
 
     // setup brokers in zookeeper as owners of partitions for this test
-    AdminUtils.createTopic(zkClient, topic, 1, 1)
-    TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 500)
+    createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 1, servers = Seq(server))
 
     var offsetChanged = false
     for(i <- 1 to 14) {
@@ -159,14 +155,14 @@ class LogOffsetTest extends JUnit3Suite with ZooKeeperTestHarness {
     val now = time.milliseconds + 30000 // pretend it is the future to avoid race conditions with the fs
 
     val offsets = server.apis.fetchOffsets(logManager, TopicAndPartition(topic, part), now, 10)
-    assertEquals(Seq(20L, 16L, 12L, 8L, 4L, 0L), offsets)
+    assertEquals(Seq(20L, 18L, 15L, 12L, 9L, 6L, 3L, 0L), offsets)
 
-    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), 1000)
+    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), "Leader should be elected")
     val topicAndPartition = TopicAndPartition(topic, part)
     val offsetRequest = OffsetRequest(Map(topicAndPartition -> PartitionOffsetRequestInfo(now, 10)), replicaId = 0)
     val consumerOffsets =
       simpleConsumer.getOffsetsBefore(offsetRequest).partitionErrorAndOffsets(topicAndPartition).offsets
-    assertEquals(Seq(20L, 16L, 12L, 8L, 4L, 0L), consumerOffsets)
+    assertEquals(Seq(20L, 18L, 15L, 12L, 9L, 6L, 3L, 0L), consumerOffsets)
   }
 
   @Test
@@ -189,7 +185,7 @@ class LogOffsetTest extends JUnit3Suite with ZooKeeperTestHarness {
 
     assertEquals(Seq(0L), offsets)
 
-    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), 1000)
+    waitUntilTrue(() => isLeaderLocalOnBroker(topic, part, server), "Leader should be elected")
     val topicAndPartition = TopicAndPartition(topic, part)
     val offsetRequest =
       OffsetRequest(Map(topicAndPartition -> PartitionOffsetRequestInfo(OffsetRequest.EarliestTime, 10)))
diff --git a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala
index 17a99f182f64c..d5d351c4f2593 100644
--- a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala
@@ -16,20 +16,22 @@
 */
 package kafka.server
 
-import org.scalatest.junit.JUnit3Suite
-import org.junit.Assert._
-import java.io.File
-import kafka.admin.AdminUtils
 import kafka.utils.TestUtils._
 import kafka.utils.IntEncoder
 import kafka.utils.{Utils, TestUtils}
 import kafka.zk.ZooKeeperTestHarness
 import kafka.common._
-import kafka.producer.{ProducerConfig, KeyedMessage, Producer}
+import kafka.producer.{KeyedMessage, Producer}
+import kafka.serializer.StringEncoder
+
+import java.io.File
+
+import org.scalatest.junit.JUnit3Suite
+import org.junit.Assert._
 
 class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
 
-  val configs = TestUtils.createBrokerConfigs(2).map(new KafkaConfig(_) {
+  val configs = TestUtils.createBrokerConfigs(2, false).map(new KafkaConfig(_) {
     override val replicaLagTimeMaxMs = 5000L
     override val replicaLagMaxMessages = 10L
     override val replicaFetchWaitMaxMs = 1000
@@ -50,46 +52,43 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
   var hwFile1: OffsetCheckpoint = new OffsetCheckpoint(new File(configProps1.logDirs(0), ReplicaManager.HighWatermarkFilename))
   var hwFile2: OffsetCheckpoint = new OffsetCheckpoint(new File(configProps2.logDirs(0), ReplicaManager.HighWatermarkFilename))
   var servers: Seq[KafkaServer] = Seq.empty[KafkaServer]
-  
-  val producerProps = getProducerConfig(TestUtils.getBrokerListStrFromConfigs(configs))
-  producerProps.put("key.serializer.class", classOf[IntEncoder].getName.toString)
-  producerProps.put("request.required.acks", "-1")
-  
-  override def tearDown() {
-    super.tearDown()
-    for(server <- servers) {
-      server.shutdown()
-      Utils.rm(server.config.logDirs(0))
-    }
-  }
 
-  def testHWCheckpointNoFailuresSingleLogSegment {
+  override def setUp() {
+    super.setUp()
+
     // start both servers
     server1 = TestUtils.createServer(configProps1)
     server2 = TestUtils.createServer(configProps2)
     servers ++= List(server1, server2)
 
-    producer = new Producer[Int, String](new ProducerConfig(producerProps))
-
     // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(0,1)))
+    createTopic(zkClient, topic, partitionReplicaAssignment = Map(0->Seq(0,1)), servers = servers)
 
-    // wait until leader is elected
-    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
-    assertTrue("Leader should get elected", leader.isDefined)
-    // NOTE: this is to avoid transient test failures
-    assertTrue("Leader could be broker 0 or broker 1", (leader.getOrElse(-1) == 0) || (leader.getOrElse(-1) == 1))
+    // create the producer
+    producer = TestUtils.createProducer[Int, String](TestUtils.getBrokerListStrFromConfigs(configs),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[IntEncoder].getName)
+  }
+
+  override def tearDown() {
+    producer.close()
+    for(server <- servers) {
+      server.shutdown()
+      Utils.rm(server.config.logDirs(0))
+    }
+    super.tearDown()
+  }
 
+  def testHWCheckpointNoFailuresSingleLogSegment {
     val numMessages = 2L
     sendMessages(numMessages.toInt)
 
     // give some time for the follower 1 to record leader HW
-    assertTrue("Failed to update highwatermark for follower after 1000 ms", 
-               TestUtils.waitUntilTrue(() =>
-                 server2.replicaManager.getReplica(topic, 0).get.highWatermark == numMessages, 10000))
+    TestUtils.waitUntilTrue(() =>
+      server2.replicaManager.getReplica(topic, 0).get.highWatermark.messageOffset == numMessages,
+      "Failed to update high watermark for follower after timeout")
 
     servers.foreach(server => server.replicaManager.checkpointHighWatermarks())
-    producer.close()
     val leaderHW = hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L)
     assertEquals(numMessages, leaderHW)
     val followerHW = hwFile2.read.getOrElse(TopicAndPartition(topic, 0), 0L)
@@ -97,22 +96,8 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
   }
 
   def testHWCheckpointWithFailuresSingleLogSegment {
-    // start both servers
-    server1 = TestUtils.createServer(configProps1)
-    server2 = TestUtils.createServer(configProps2)
-    servers ++= List(server1, server2)
-
-    producer = new Producer[Int, String](new ProducerConfig(producerProps))
+    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
 
-    // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(0,1)))
-
-    // wait until leader is elected
-    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
-    assertTrue("Leader should get elected", leader.isDefined)
-    // NOTE: this is to avoid transient test failures
-    assertTrue("Leader could be broker 0 or broker 1", (leader.getOrElse(-1) == 0) || (leader.getOrElse(-1) == 1))
-    
     assertEquals(0L, hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L))
 
     sendMessages(1)
@@ -124,13 +109,13 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
     assertEquals(hw, hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L))
 
     // check if leader moves to the other server
-    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500, leader)
+    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, oldLeaderOpt = leader)
     assertEquals("Leader must move to broker 1", 1, leader.getOrElse(-1))
 
     // bring the preferred replica back
     server1.startup()
 
-    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
+    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
     assertTrue("Leader must remain on broker 1, in case of zookeeper session expiration it can move to broker 0",
       leader.isDefined && (leader.get == 0 || leader.get == 1))
 
@@ -140,50 +125,32 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
     assertEquals(hw, hwFile2.read.getOrElse(TopicAndPartition(topic, 0), 0L))
 
     server2.startup()
-    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500, leader)
+    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, oldLeaderOpt = leader)
     assertTrue("Leader must remain on broker 0, in case of zookeeper session expiration it can move to broker 1",
       leader.isDefined && (leader.get == 0 || leader.get == 1))
 
     sendMessages(1)
     hw += 1
-      
+
     // give some time for follower 1 to record leader HW of 60
-    assertTrue("Failed to update highwatermark for follower after 1000 ms", TestUtils.waitUntilTrue(() =>
-      server2.replicaManager.getReplica(topic, 0).get.highWatermark == hw, 2000))
+    TestUtils.waitUntilTrue(() =>
+      server2.replicaManager.getReplica(topic, 0).get.highWatermark.messageOffset == hw,
+      "Failed to update high watermark for follower after timeout")
     // shutdown the servers to allow the hw to be checkpointed
     servers.foreach(server => server.shutdown())
-    producer.close()
     assertEquals(hw, hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L))
     assertEquals(hw, hwFile2.read.getOrElse(TopicAndPartition(topic, 0), 0L))
   }
 
   def testHWCheckpointNoFailuresMultipleLogSegments {
-    // start both servers
-    server1 = TestUtils.createServer(configs.head)
-    server2 = TestUtils.createServer(configs.last)
-    servers ++= List(server1, server2)
-
-    hwFile1 = new OffsetCheckpoint(new File(server1.config.logDirs(0), ReplicaManager.HighWatermarkFilename))
-    hwFile2 = new OffsetCheckpoint(new File(server2.config.logDirs(0), ReplicaManager.HighWatermarkFilename))
-
-    producer = new Producer[Int, String](new ProducerConfig(producerProps))
-
-    // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(0,1)))
-
-    // wait until leader is elected
-    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
-    assertTrue("Leader should get elected", leader.isDefined)
-    // NOTE: this is to avoid transient test failures
-    assertTrue("Leader could be broker 0 or broker 1", (leader.getOrElse(-1) == 0) || (leader.getOrElse(-1) == 1))
     sendMessages(20)
-    var hw = 20L
+    val hw = 20L
     // give some time for follower 1 to record leader HW of 600
-    assertTrue("Failed to update highwatermark for follower after 1000 ms", TestUtils.waitUntilTrue(() =>
-      server2.replicaManager.getReplica(topic, 0).get.highWatermark == hw, 1000))
+    TestUtils.waitUntilTrue(() =>
+      server2.replicaManager.getReplica(topic, 0).get.highWatermark.messageOffset == hw,
+      "Failed to update high watermark for follower after timeout")
     // shutdown the servers to allow the hw to be checkpointed
     servers.foreach(server => server.shutdown())
-    producer.close()
     val leaderHW = hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L)
     assertEquals(hw, leaderHW)
     val followerHW = hwFile2.read.getOrElse(TopicAndPartition(topic, 0), 0L)
@@ -191,31 +158,15 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
   }
 
   def testHWCheckpointWithFailuresMultipleLogSegments {
-    // start both servers
-    server1 = TestUtils.createServer(configs.head)
-    server2 = TestUtils.createServer(configs.last)
-    servers ++= List(server1, server2)
-
-    hwFile1 = new OffsetCheckpoint(new File(server1.config.logDirs(0), ReplicaManager.HighWatermarkFilename))
-    hwFile2 = new OffsetCheckpoint(new File(server2.config.logDirs(0), ReplicaManager.HighWatermarkFilename))
-
-    producer = new Producer[Int, String](new ProducerConfig(producerProps))
-
-    // create topic with 1 partition, 2 replicas, one on each broker
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, Map(0->Seq(server1.config.brokerId, server2.config.brokerId)))
-
-    // wait until leader is elected
-    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500)
-    assertTrue("Leader should get elected", leader.isDefined)
-    // NOTE: this is to avoid transient test failures
-    assertTrue("Leader could be broker 0 or broker 1", (leader.getOrElse(-1) == 0) || (leader.getOrElse(-1) == 1))
+    var leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId)
 
     sendMessages(2)
     var hw = 2L
-    
+
     // allow some time for the follower to get the leader HW
-    assertTrue("Failed to update highwatermark for follower after 1000 ms", TestUtils.waitUntilTrue(() =>
-      server2.replicaManager.getReplica(topic, 0).get.highWatermark == hw, 1000))
+    TestUtils.waitUntilTrue(() =>
+      server2.replicaManager.getReplica(topic, 0).get.highWatermark.messageOffset == hw,
+      "Failed to update high watermark for follower after timeout")
     // kill the server hosting the preferred replica
     server1.shutdown()
     server2.shutdown()
@@ -224,7 +175,7 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
 
     server2.startup()
     // check if leader moves to the other server
-    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, 500, leader)
+    leader = waitUntilLeaderIsElectedOrChanged(zkClient, topic, partitionId, oldLeaderOpt = leader)
     assertEquals("Leader must move to broker 1", 1, leader.getOrElse(-1))
 
     assertEquals(hw, hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L))
@@ -237,13 +188,13 @@ class LogRecoveryTest extends JUnit3Suite with ZooKeeperTestHarness {
 
     sendMessages(2)
     hw += 2
-    
+
     // allow some time for the follower to get the leader HW
-    assertTrue("Failed to update highwatermark for follower after 1000 ms", TestUtils.waitUntilTrue(() =>
-      server1.replicaManager.getReplica(topic, 0).get.highWatermark == hw, 1000))
+    TestUtils.waitUntilTrue(() =>
+      server1.replicaManager.getReplica(topic, 0).get.highWatermark.messageOffset == hw,
+      "Failed to update high watermark for follower after timeout")
     // shutdown the servers to allow the hw to be checkpointed
     servers.foreach(server => server.shutdown())
-    producer.close()
     assertEquals(hw, hwFile1.read.getOrElse(TopicAndPartition(topic, 0), 0L))
     assertEquals(hw, hwFile2.read.getOrElse(TopicAndPartition(topic, 0), 0L))
   }
diff --git a/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala b/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala
index 6a96d80daac57..8c5364fa97da1 100644
--- a/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala
+++ b/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala
@@ -23,14 +23,13 @@ import junit.framework.Assert._
 import java.util.Properties
 import kafka.consumer.SimpleConsumer
 import org.junit.{After, Before, Test}
-import kafka.message.{NoCompressionCodec, ByteBufferMessageSet, Message}
 import kafka.zk.ZooKeeperTestHarness
 import org.scalatest.junit.JUnit3Suite
-import kafka.api.{OffsetCommitRequest, OffsetFetchRequest}
+import kafka.api.{ConsumerMetadataRequest, OffsetCommitRequest, OffsetFetchRequest}
 import kafka.utils.TestUtils._
-import kafka.common.{ErrorMapping, TopicAndPartition, OffsetMetadataAndError}
+import kafka.common.{OffsetMetadataAndError, OffsetAndMetadata, ErrorMapping, TopicAndPartition}
 import scala.util.Random
-import kafka.admin.AdminUtils
+import scala.collection._
 
 class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
   val random: Random = new Random()
@@ -39,6 +38,7 @@ class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
   var server: KafkaServer = null
   var logSize: Int = 100
   val brokerPort: Int = 9099
+  val group = "test-group"
   var simpleConsumer: SimpleConsumer = null
   var time: Time = new MockTime()
 
@@ -51,6 +51,14 @@ class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
     time = new MockTime()
     server = TestUtils.createServer(new KafkaConfig(config), time)
     simpleConsumer = new SimpleConsumer("localhost", brokerPort, 1000000, 64*1024, "test-client")
+    val consumerMetadataRequest = ConsumerMetadataRequest(group)
+    Stream.continually {
+      val consumerMetadataResponse = simpleConsumer.send(consumerMetadataRequest)
+      consumerMetadataResponse.coordinatorOpt.isDefined
+    }.dropWhile(success => {
+      if (!success) Thread.sleep(1000)
+      !success
+    })
   }
 
   @After
@@ -69,37 +77,36 @@ class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
     val topicAndPartition = TopicAndPartition(topic, 0)
     val expectedReplicaAssignment = Map(0  -> List(1))
     // create the topic
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, expectedReplicaAssignment)
-    val leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-    val commitRequest = OffsetCommitRequest("test-group", Map(topicAndPartition -> OffsetMetadataAndError(offset=42L)))
+    createTopic(zkClient, topic, partitionReplicaAssignment = expectedReplicaAssignment, servers = Seq(server))
+
+    val commitRequest = OffsetCommitRequest("test-group", immutable.Map(topicAndPartition -> OffsetAndMetadata(offset=42L)))
     val commitResponse = simpleConsumer.commitOffsets(commitRequest)
 
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(topicAndPartition).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(topicAndPartition).get)
 
     // Fetch it and verify
-    val fetchRequest = OffsetFetchRequest("test-group", Seq(topicAndPartition))
+    val fetchRequest = OffsetFetchRequest(group, Seq(topicAndPartition))
     val fetchResponse = simpleConsumer.fetchOffsets(fetchRequest)
 
     assertEquals(ErrorMapping.NoError, fetchResponse.requestInfo.get(topicAndPartition).get.error)
-    //assertEquals(OffsetMetadataAndError.NoMetadata, fetchResponse.requestInfo.get(topicAndPartition).get.metadata)
+    assertEquals(OffsetAndMetadata.NoMetadata, fetchResponse.requestInfo.get(topicAndPartition).get.metadata)
     assertEquals(42L, fetchResponse.requestInfo.get(topicAndPartition).get.offset)
 
     // Commit a new offset
-    val commitRequest1 = OffsetCommitRequest("test-group", Map(topicAndPartition -> OffsetMetadataAndError(
+    val commitRequest1 = OffsetCommitRequest(group, immutable.Map(topicAndPartition -> OffsetAndMetadata(
       offset=100L,
       metadata="some metadata"
     )))
     val commitResponse1 = simpleConsumer.commitOffsets(commitRequest1)
 
-    assertEquals(ErrorMapping.NoError, commitResponse1.requestInfo.get(topicAndPartition).get)
+    assertEquals(ErrorMapping.NoError, commitResponse1.commitStatus.get(topicAndPartition).get)
 
     // Fetch it and verify
-    val fetchRequest1 = OffsetFetchRequest("test-group", Seq(topicAndPartition))
+    val fetchRequest1 = OffsetFetchRequest(group, Seq(topicAndPartition))
     val fetchResponse1 = simpleConsumer.fetchOffsets(fetchRequest1)
-    
+
     assertEquals(ErrorMapping.NoError, fetchResponse1.requestInfo.get(topicAndPartition).get.error)
-    //assertEquals("some metadata", fetchResponse1.requestInfo.get(topicAndPartition).get.metadata)
+    assertEquals("some metadata", fetchResponse1.requestInfo.get(topicAndPartition).get.metadata)
     assertEquals(100L, fetchResponse1.requestInfo.get(topicAndPartition).get.offset)
 
   }
@@ -109,42 +116,34 @@ class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
     val topic1 = "topic-1"
     val topic2 = "topic-2"
     val topic3 = "topic-3"
-    val topic4 = "topic-4"
-
-    val expectedReplicaAssignment = Map(0  -> List(1))
-    // create the topic
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic1, expectedReplicaAssignment)
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic2, expectedReplicaAssignment)
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic3, expectedReplicaAssignment)
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic4, expectedReplicaAssignment)
-    var leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-    leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-    leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic3, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-    leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic4, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-
-    val commitRequest = OffsetCommitRequest("test-group", Map(
-      TopicAndPartition(topic1, 0) -> OffsetMetadataAndError(offset=42L, metadata="metadata one"),
-      TopicAndPartition(topic2, 0) -> OffsetMetadataAndError(offset=43L, metadata="metadata two"),
-      TopicAndPartition(topic3, 0) -> OffsetMetadataAndError(offset=44L, metadata="metadata three"),
-      TopicAndPartition(topic2, 1) -> OffsetMetadataAndError(offset=45L)
+    val topic4 = "topic-4" // Topic that group never consumes
+    val topic5 = "topic-5" // Non-existent topic
+
+    createTopic(zkClient, topic1, servers = Seq(server), numPartitions = 1)
+    createTopic(zkClient, topic2, servers = Seq(server), numPartitions = 2)
+    createTopic(zkClient, topic3, servers = Seq(server), numPartitions = 1)
+    createTopic(zkClient, topic4, servers = Seq(server), numPartitions = 1)
+
+    val commitRequest = OffsetCommitRequest("test-group", immutable.Map(
+      TopicAndPartition(topic1, 0) -> OffsetAndMetadata(offset=42L, metadata="metadata one"),
+      TopicAndPartition(topic2, 0) -> OffsetAndMetadata(offset=43L, metadata="metadata two"),
+      TopicAndPartition(topic3, 0) -> OffsetAndMetadata(offset=44L, metadata="metadata three"),
+      TopicAndPartition(topic2, 1) -> OffsetAndMetadata(offset=45L)
     ))
     val commitResponse = simpleConsumer.commitOffsets(commitRequest)
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(TopicAndPartition(topic1, 0)).get)
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(TopicAndPartition(topic2, 0)).get)
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(TopicAndPartition(topic3, 0)).get)
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(TopicAndPartition(topic2, 1)).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(TopicAndPartition(topic1, 0)).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(TopicAndPartition(topic2, 0)).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(TopicAndPartition(topic3, 0)).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(TopicAndPartition(topic2, 1)).get)
 
-    val fetchRequest = OffsetFetchRequest("test-group", Seq(
+    val fetchRequest = OffsetFetchRequest(group, Seq(
       TopicAndPartition(topic1, 0),
       TopicAndPartition(topic2, 0),
       TopicAndPartition(topic3, 0),
       TopicAndPartition(topic2, 1),
       TopicAndPartition(topic3, 1), // An unknown partition
-      TopicAndPartition(topic4, 0)  // An unknown topic
+      TopicAndPartition(topic4, 0), // An unused topic
+      TopicAndPartition(topic5, 0)  // An unknown topic
     ))
     val fetchResponse = simpleConsumer.fetchOffsets(fetchRequest)
 
@@ -153,61 +152,51 @@ class OffsetCommitTest extends JUnit3Suite with ZooKeeperTestHarness {
     assertEquals(ErrorMapping.NoError, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 0)).get.error)
     assertEquals(ErrorMapping.NoError, fetchResponse.requestInfo.get(TopicAndPartition(topic2, 1)).get.error)
     assertEquals(ErrorMapping.UnknownTopicOrPartitionCode, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get.error)
-    assertEquals(ErrorMapping.UnknownTopicOrPartitionCode, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.error)
-
-    //assertEquals("metadata one", fetchResponse.requestInfo.get(TopicAndPartition(topic1, 0)).get.metadata)
-    //assertEquals("metadata two", fetchResponse.requestInfo.get(TopicAndPartition(topic2, 0)).get.metadata)
-    //assertEquals("metadata three", fetchResponse.requestInfo.get(TopicAndPartition(topic3, 0)).get.metadata)
-    //assertEquals(OffsetMetadataAndError.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic2, 1)).get.metadata)
-    //assertEquals(OffsetMetadataAndError.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get.metadata)
-    //assertEquals(OffsetMetadataAndError.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.metadata)
+    assertEquals(ErrorMapping.NoError, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.error)
+    assertEquals(ErrorMapping.UnknownTopicOrPartitionCode, fetchResponse.requestInfo.get(TopicAndPartition(topic5, 0)).get.error)
+    assertEquals(OffsetMetadataAndError.UnknownTopicOrPartition, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get)
+    assertEquals(OffsetMetadataAndError.NoOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get)
+    assertEquals(OffsetMetadataAndError.UnknownTopicOrPartition, fetchResponse.requestInfo.get(TopicAndPartition(topic5, 0)).get)
+
+    assertEquals("metadata one", fetchResponse.requestInfo.get(TopicAndPartition(topic1, 0)).get.metadata)
+    assertEquals("metadata two", fetchResponse.requestInfo.get(TopicAndPartition(topic2, 0)).get.metadata)
+    assertEquals("metadata three", fetchResponse.requestInfo.get(TopicAndPartition(topic3, 0)).get.metadata)
+    assertEquals(OffsetAndMetadata.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic2, 1)).get.metadata)
+    assertEquals(OffsetAndMetadata.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get.metadata)
+    assertEquals(OffsetAndMetadata.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.metadata)
+    assertEquals(OffsetAndMetadata.NoMetadata, fetchResponse.requestInfo.get(TopicAndPartition(topic5, 0)).get.metadata)
 
     assertEquals(42L, fetchResponse.requestInfo.get(TopicAndPartition(topic1, 0)).get.offset)
     assertEquals(43L, fetchResponse.requestInfo.get(TopicAndPartition(topic2, 0)).get.offset)
     assertEquals(44L, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 0)).get.offset)
     assertEquals(45L, fetchResponse.requestInfo.get(TopicAndPartition(topic2, 1)).get.offset)
-    assertEquals(OffsetMetadataAndError.InvalidOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get.offset)
-    assertEquals(OffsetMetadataAndError.InvalidOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.offset)
+    assertEquals(OffsetAndMetadata.InvalidOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic3, 1)).get.offset)
+    assertEquals(OffsetAndMetadata.InvalidOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic4, 0)).get.offset)
+    assertEquals(OffsetAndMetadata.InvalidOffset, fetchResponse.requestInfo.get(TopicAndPartition(topic5, 0)).get.offset)
   }
 
   @Test
   def testLargeMetadataPayload() {
     val topicAndPartition = TopicAndPartition("large-metadata", 0)
     val expectedReplicaAssignment = Map(0  -> List(1))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topicAndPartition.topic, expectedReplicaAssignment)
-    var leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topicAndPartition.topic, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
+    createTopic(zkClient, topicAndPartition.topic, partitionReplicaAssignment = expectedReplicaAssignment,
+                servers = Seq(server))
 
-    val commitRequest = OffsetCommitRequest("test-group", Map(topicAndPartition -> OffsetMetadataAndError(
+    val commitRequest = OffsetCommitRequest("test-group", immutable.Map(topicAndPartition -> OffsetAndMetadata(
       offset=42L,
       metadata=random.nextString(server.config.offsetMetadataMaxSize)
     )))
     val commitResponse = simpleConsumer.commitOffsets(commitRequest)
 
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(topicAndPartition).get)
+    assertEquals(ErrorMapping.NoError, commitResponse.commitStatus.get(topicAndPartition).get)
 
-    val commitRequest1 = OffsetCommitRequest("test-group", Map(topicAndPartition -> OffsetMetadataAndError(
+    val commitRequest1 = OffsetCommitRequest(group, immutable.Map(topicAndPartition -> OffsetAndMetadata(
       offset=42L,
       metadata=random.nextString(server.config.offsetMetadataMaxSize + 1)
     )))
     val commitResponse1 = simpleConsumer.commitOffsets(commitRequest1)
 
-    assertEquals(ErrorMapping.OffsetMetadataTooLargeCode, commitResponse1.requestInfo.get(topicAndPartition).get)
+    assertEquals(ErrorMapping.OffsetMetadataTooLargeCode, commitResponse1.commitStatus.get(topicAndPartition).get)
 
   }
-
-  @Test
-  def testNullMetadata() {
-    val topicAndPartition = TopicAndPartition("null-metadata", 0)
-    val expectedReplicaAssignment = Map(0  -> List(1))
-    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topicAndPartition.topic, expectedReplicaAssignment)
-    var leaderIdOpt = TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topicAndPartition.topic, 0, 1000)
-    assertTrue("Leader should be elected after topic creation", leaderIdOpt.isDefined)
-    val commitRequest = OffsetCommitRequest("test-group", Map(topicAndPartition -> OffsetMetadataAndError(
-      offset=42L,
-      metadata=null
-    )))
-    val commitResponse = simpleConsumer.commitOffsets(commitRequest)
-    assertEquals(ErrorMapping.NoError, commitResponse.requestInfo.get(topicAndPartition).get)
-  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
index dd85c71816715..da4bafc1e2a94 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
@@ -22,13 +22,12 @@ import kafka.zk.ZooKeeperTestHarness
 import kafka.utils.TestUtils._
 import kafka.producer.KeyedMessage
 import kafka.serializer.StringEncoder
-import kafka.admin.AdminUtils
 import kafka.utils.TestUtils
 import junit.framework.Assert._
 import kafka.common._
 
 class ReplicaFetchTest extends JUnit3Suite with ZooKeeperTestHarness  {
-  val props = createBrokerConfigs(2)
+  val props = createBrokerConfigs(2,false)
   val configs = props.map(p => new KafkaConfig(p))
   var brokers: Seq[KafkaServer] = null
   val topic1 = "foo"
@@ -51,14 +50,13 @@ class ReplicaFetchTest extends JUnit3Suite with ZooKeeperTestHarness  {
 
     // create a topic and partition and await leadership
     for (topic <- List(topic1,topic2)) {
-      AdminUtils.createTopic(zkClient, topic, 1, 2)
-      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0, 1000)
+      createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 2, servers = brokers)
     }
 
     // send test messages to leader
-    val producer = TestUtils.createProducer[String, String](TestUtils.getBrokerListStrFromConfigs(configs), 
-                                                            new StringEncoder(), 
-                                                            new StringEncoder())
+    val producer = TestUtils.createProducer[String, String](TestUtils.getBrokerListStrFromConfigs(configs),
+                                                            encoder = classOf[StringEncoder].getName,
+                                                            keyEncoder = classOf[StringEncoder].getName)
     val messages = testMessageList1.map(m => new KeyedMessage(topic1, m, m)) ++ testMessageList2.map(m => new KeyedMessage(topic2, m, m))
     producer.send(messages:_*)
     producer.close()
@@ -73,6 +71,6 @@ class ReplicaFetchTest extends JUnit3Suite with ZooKeeperTestHarness  {
       }
       result
     }
-    assertTrue("Broker logs should be identical", waitUntilTrue(logsMatch, 6000))
+    waitUntilTrue(logsMatch, "Broker logs should be identical")
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
new file mode 100644
index 0000000000000..faa907131ed0a
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.utils.{MockScheduler, MockTime, TestUtils}
+
+import java.util.concurrent.atomic.AtomicBoolean
+import java.io.File
+
+import org.easymock.EasyMock
+import org.I0Itec.zkclient.ZkClient
+import org.scalatest.junit.JUnit3Suite
+import org.junit.Test
+
+class ReplicaManagerTest extends JUnit3Suite {
+
+  val topic = "test-topic"
+
+  @Test
+  def testHighWaterMarkDirectoryMapping() {
+    val props = TestUtils.createBrokerConfig(1)
+    val config = new KafkaConfig(props)
+    val zkClient = EasyMock.createMock(classOf[ZkClient])
+    val mockLogMgr = TestUtils.createLogManager(config.logDirs.map(new File(_)).toArray)
+    val time: MockTime = new MockTime()
+    val rm = new ReplicaManager(config, time, zkClient, new MockScheduler(time), mockLogMgr, new AtomicBoolean(false))
+    val partition = rm.getOrCreatePartition(topic, 1)
+    partition.getOrCreateReplica(1)
+    rm.checkpointHighWatermarks()
+
+    // shutdown the replica manager upon test completion
+    rm.shutdown(false)
+  }
+
+  @Test
+  def testHighwaterMarkRelativeDirectoryMapping() {
+    val props = TestUtils.createBrokerConfig(1)
+    props.put("log.dir", TestUtils.tempRelativeDir("data").getAbsolutePath)
+    val config = new KafkaConfig(props)
+    val zkClient = EasyMock.createMock(classOf[ZkClient])
+    val mockLogMgr = TestUtils.createLogManager(config.logDirs.map(new File(_)).toArray)
+    val time: MockTime = new MockTime()
+    val rm = new ReplicaManager(config, time, zkClient, new MockScheduler(time), mockLogMgr, new AtomicBoolean(false))
+    val partition = rm.getOrCreatePartition(topic, 1)
+    partition.getOrCreateReplica(1)
+    rm.checkpointHighWatermarks()
+
+    // shutdown the replica manager upon test completion
+    rm.shutdown(false)
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/server/RequestPurgatoryTest.scala b/core/src/test/scala/unit/kafka/server/RequestPurgatoryTest.scala
deleted file mode 100644
index 4f61f8469df99..0000000000000
--- a/core/src/test/scala/unit/kafka/server/RequestPurgatoryTest.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.server
-
-import scala.collection._
-import org.junit.Test
-import junit.framework.Assert._
-import kafka.message._
-import kafka.api._
-import kafka.utils.TestUtils
-import org.scalatest.junit.JUnit3Suite
-
-
-class RequestPurgatoryTest extends JUnit3Suite {
-
-  val producerRequest1 = TestUtils.produceRequest("test", 0, new ByteBufferMessageSet(new Message("hello1".getBytes)))
-  val producerRequest2 = TestUtils.produceRequest("test", 0, new ByteBufferMessageSet(new Message("hello2".getBytes)))
-  var purgatory: MockRequestPurgatory = null
-  
-  override def setUp() {
-    super.setUp()
-    purgatory = new MockRequestPurgatory()
-  }
-  
-  override def tearDown() {
-    purgatory.shutdown()
-    super.tearDown()
-  }
-
-  @Test
-  def testRequestSatisfaction() {
-    val r1 = new DelayedRequest(Array("test1"), null, 100000L)
-    val r2 = new DelayedRequest(Array("test2"), null, 100000L)
-    assertEquals("With no waiting requests, nothing should be satisfied", 0, purgatory.update("test1", producerRequest1).size)
-    purgatory.watch(r1)
-    assertEquals("Still nothing satisfied", 0, purgatory.update("test1", producerRequest1).size)
-    purgatory.watch(r2)
-    assertEquals("Still nothing satisfied", 0, purgatory.update("test2", producerRequest2).size)
-    purgatory.satisfied += r1
-    assertEquals("r1 satisfied", mutable.ArrayBuffer(r1), purgatory.update("test1", producerRequest1))
-    assertEquals("Nothing satisfied", 0, purgatory.update("test1", producerRequest2).size)
-    purgatory.satisfied += r2
-    assertEquals("r2 satisfied", mutable.ArrayBuffer(r2), purgatory.update("test2", producerRequest2))
-    assertEquals("Nothing satisfied", 0, purgatory.update("test2", producerRequest2).size)
-  }
-
-  @Test
-  def testRequestExpiry() {
-    val expiration = 20L
-    val r1 = new DelayedRequest(Array("test1"), null, expiration)
-    val r2 = new DelayedRequest(Array("test1"), null, 200000L)
-    val start = System.currentTimeMillis
-    purgatory.watch(r1)
-    purgatory.watch(r2)
-    purgatory.awaitExpiration(r1)
-    val elapsed = System.currentTimeMillis - start
-    assertTrue("r1 expired", purgatory.expired.contains(r1))
-    assertTrue("r2 hasn't expired", !purgatory.expired.contains(r2))
-    assertTrue("Time for expiration %d should at least %d".format(elapsed, expiration), elapsed >= expiration)
-  }
-  
-  class MockRequestPurgatory extends RequestPurgatory[DelayedRequest, ProducerRequest] {
-    val satisfied = mutable.Set[DelayedRequest]()
-    val expired = mutable.Set[DelayedRequest]()
-    def awaitExpiration(delayed: DelayedRequest) = {
-      delayed synchronized {
-        delayed.wait()
-      }
-    }
-    def checkSatisfied(request: ProducerRequest, delayed: DelayedRequest): Boolean = satisfied.contains(delayed)
-    def expire(delayed: DelayedRequest) {
-      expired += delayed
-      delayed synchronized {
-        delayed.notify()
-      }
-    }
-  }
-  
-}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
index 20fe93e623319..ba1e48e4300c9 100644
--- a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
@@ -16,19 +16,20 @@
  */
 package kafka.server
 
-import java.io.File
-import kafka.consumer.SimpleConsumer
-import org.junit.Test
-import junit.framework.Assert._
-import kafka.message.ByteBufferMessageSet
-import org.scalatest.junit.JUnit3Suite
 import kafka.zk.ZooKeeperTestHarness
+import kafka.consumer.SimpleConsumer
 import kafka.producer._
-import kafka.utils.IntEncoder
+import kafka.utils.{IntEncoder, TestUtils, Utils}
 import kafka.utils.TestUtils._
-import kafka.admin.AdminUtils
 import kafka.api.FetchRequestBuilder
-import kafka.utils.{TestUtils, Utils}
+import kafka.message.ByteBufferMessageSet
+import kafka.serializer.StringEncoder
+
+import java.io.File
+
+import org.junit.Test
+import org.scalatest.junit.JUnit3Suite
+import junit.framework.Assert._
 
 class ServerShutdownTest extends JUnit3Suite with ZooKeeperTestHarness {
   val port = TestUtils.choosePort
@@ -44,13 +45,12 @@ class ServerShutdownTest extends JUnit3Suite with ZooKeeperTestHarness {
   def testCleanShutdown() {
     var server = new KafkaServer(config)
     server.startup()
-    val producerConfig = getProducerConfig(TestUtils.getBrokerListStrFromConfigs(Seq(config)))
-    producerConfig.put("key.serializer.class", classOf[IntEncoder].getName.toString)
-    var producer = new Producer[Int, String](new ProducerConfig(producerConfig))
+    var producer = TestUtils.createProducer[Int, String](TestUtils.getBrokerListStrFromConfigs(Seq(config)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[IntEncoder].getName)
 
     // create topic
-    AdminUtils.createTopic(zkClient, topic, 1, 1)
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server), topic, 0, 1000)
+    createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 1, servers = Seq(server))
 
     // send some messages
     producer.send(sent1.map(m => new KeyedMessage[Int, String](topic, 0, m)):_*)
@@ -69,9 +69,11 @@ class ServerShutdownTest extends JUnit3Suite with ZooKeeperTestHarness {
     server.startup()
 
     // wait for the broker to receive the update metadata request after startup
-    TestUtils.waitUntilMetadataIsPropagated(Seq(server), topic, 0, 1000)
+    TestUtils.waitUntilMetadataIsPropagated(Seq(server), topic, 0)
 
-    producer = new Producer[Int, String](new ProducerConfig(producerConfig))
+    producer = TestUtils.createProducer[Int, String](TestUtils.getBrokerListStrFromConfigs(Seq(config)),
+      encoder = classOf[StringEncoder].getName,
+      keyEncoder = classOf[IntEncoder].getName)
     val consumer = new SimpleConsumer(host, port, 1000000, 64*1024, "")
 
     var fetchedMessage: ByteBufferMessageSet = null
@@ -96,5 +98,74 @@ class ServerShutdownTest extends JUnit3Suite with ZooKeeperTestHarness {
     producer.close()
     server.shutdown()
     Utils.rm(server.config.logDirs)
+    verifyNonDaemonThreadsStatus
+  }
+
+  @Test
+  def testCleanShutdownWithDeleteTopicEnabled() {
+    val newProps = TestUtils.createBrokerConfig(0, port)
+    newProps.setProperty("delete.topic.enable", "true")
+    val newConfig = new KafkaConfig(newProps)
+    val server = new KafkaServer(newConfig)
+    server.startup()
+    server.shutdown()
+    server.awaitShutdown()
+    Utils.rm(server.config.logDirs)
+    verifyNonDaemonThreadsStatus
+  }
+
+  @Test
+  def testCleanShutdownAfterFailedStartup() {
+    val newProps = TestUtils.createBrokerConfig(0, port)
+    newProps.setProperty("zookeeper.connect", "fakehostthatwontresolve:65535")
+    val newConfig = new KafkaConfig(newProps)
+    var server = new KafkaServer(newConfig)
+    try {
+      server.startup()
+      fail("Expected KafkaServer setup to fail, throw exception")
+    }
+    catch {
+      // Try to clean up carefully without hanging even if the test fails. This means trying to accurately
+      // identify the correct exception, making sure the server was shutdown, and cleaning up if anything
+      // goes wrong so that awaitShutdown doesn't hang
+      case e: org.I0Itec.zkclient.exception.ZkException =>
+        assertEquals(server.brokerState.currentState, NotRunning.state)
+        if (server.brokerState.currentState != NotRunning.state)
+          server.shutdown()
+      case e: Throwable =>
+        fail("Expected KafkaServer setup to fail with connection exception but caught a different exception.")
+        server.shutdown()
+    }
+    server.awaitShutdown()
+    Utils.rm(server.config.logDirs)
+    verifyNonDaemonThreadsStatus
+  }
+
+  private[this] def isNonDaemonKafkaThread(t: Thread): Boolean = {
+    val threadName = Option(t.getClass.getCanonicalName)
+      .getOrElse(t.getClass.getName())
+      .toLowerCase
+
+    !t.isDaemon && t.isAlive && threadName.startsWith("kafka")
+  }
+
+  def verifyNonDaemonThreadsStatus() {
+    assertEquals(0, Thread.getAllStackTraces.keySet().toArray
+      .map{ _.asInstanceOf[Thread] }
+      .count(isNonDaemonKafkaThread))
+  }
+
+  def testConsecutiveShutdown(){
+    val server = new KafkaServer(config)
+    try {
+      server.startup()
+      server.shutdown()
+      server.awaitShutdown()
+      server.shutdown()
+      assertTrue(true);
+    }
+    catch{
+      case ex => fail()
+    }
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala b/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala
new file mode 100644
index 0000000000000..8fe7cd496f74a
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import org.scalatest.junit.JUnit3Suite
+import kafka.utils.ZkUtils
+import kafka.utils.Utils
+import kafka.utils.TestUtils
+
+import kafka.zk.ZooKeeperTestHarness
+import junit.framework.Assert._
+
+class ServerStartupTest extends JUnit3Suite with ZooKeeperTestHarness {
+  var server : KafkaServer = null
+  val brokerId = 0
+  val zookeeperChroot = "/kafka-chroot-for-unittest"
+
+  override def setUp() {
+    super.setUp()
+    val props = TestUtils.createBrokerConfig(brokerId, TestUtils.choosePort())
+    val zooKeeperConnect = props.get("zookeeper.connect")
+    props.put("zookeeper.connect", zooKeeperConnect + zookeeperChroot)
+    server = TestUtils.createServer(new KafkaConfig(props))
+  }
+
+  override def tearDown() {
+    server.shutdown()
+    Utils.rm(server.config.logDirs)
+    super.tearDown()
+  }
+
+  def testBrokerCreatesZKChroot {
+    val pathExists = ZkUtils.pathExists(zkClient, zookeeperChroot)
+    assertTrue(pathExists)
+  }
+
+  def testServerStartupConsecutively() {
+    server.shutdown()
+    try {
+      intercept[IllegalStateException]{
+        server.startup()
+        server.startup()
+      }
+    }
+    finally {
+      server.shutdown()
+    }
+
+  }
+
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/server/SimpleFetchTest.scala b/core/src/test/scala/unit/kafka/server/SimpleFetchTest.scala
index 1317b4c3c60b8..ccf5e2e36260b 100644
--- a/core/src/test/scala/unit/kafka/server/SimpleFetchTest.scala
+++ b/core/src/test/scala/unit/kafka/server/SimpleFetchTest.scala
@@ -16,17 +16,22 @@
  */
 package kafka.server
 
-import kafka.cluster.{Partition, Replica}
+import kafka.api._
+import kafka.utils._
+import kafka.cluster.Replica
+import kafka.common.TopicAndPartition
 import kafka.log.Log
 import kafka.message.{ByteBufferMessageSet, Message}
-import kafka.network.RequestChannel
-import kafka.utils.{ZkUtils, Time, TestUtils, MockTime}
+
+import scala.Some
+import java.util.Collections
+import java.util.concurrent.atomic.AtomicBoolean
+import collection.JavaConversions._
+
 import org.easymock.EasyMock
 import org.I0Itec.zkclient.ZkClient
 import org.scalatest.junit.JUnit3Suite
-import kafka.api._
-import scala.Some
-import kafka.common.TopicAndPartition
+import junit.framework.Assert._
 
 class SimpleFetchTest extends JUnit3Suite {
 
@@ -35,178 +40,102 @@ class SimpleFetchTest extends JUnit3Suite {
     override val replicaFetchWaitMaxMs = 100
     override val replicaLagMaxMessages = 10L
   })
-  val topic = "foo"
-  val partitionId = 0
 
-  /**
-   * The scenario for this test is that there is one topic, "test-topic", one broker "0" that has
-   * one  partition with one follower replica on broker "1".  The leader replica on "0"
-   * has HW of "5" and LEO of "20".  The follower on broker "1" has a local replica
-   * with a HW matching the leader's ("5") and LEO of "15", meaning it's not in-sync
-   * but is still in ISR (hasn't yet expired from ISR).
-   *
-   * When a normal consumer fetches data, it only should only see data upto the HW of the leader,
-   * in this case up an offset of "5".
-   */
-  def testNonReplicaSeesHwWhenFetching() {
-    /* setup */
-    val time = new MockTime
-    val leo = 20
-    val hw = 5
-    val fetchSize = 100
-    val messages = new Message("test-message".getBytes())
-
-    // create nice mock since we don't particularly care about zkclient calls
-    val zkClient = EasyMock.createNiceMock(classOf[ZkClient])
-    EasyMock.expect(zkClient.exists(ZkUtils.ControllerEpochPath)).andReturn(false)
-    EasyMock.replay(zkClient)
-
-    val log = EasyMock.createMock(classOf[kafka.log.Log])
-    EasyMock.expect(log.logEndOffset).andReturn(leo).anyTimes()
-    EasyMock.expect(log)
-    EasyMock.expect(log.read(0, fetchSize, Some(hw))).andReturn(new ByteBufferMessageSet(messages))
-    EasyMock.replay(log)
+  // set the replica manager with the partition
+  val time = new MockTime
+  val leaderLEO = 20L
+  val followerLEO = 15L
+  val partitionHW = 5
 
-    val logManager = EasyMock.createMock(classOf[kafka.log.LogManager])
-    EasyMock.expect(logManager.getLog(TopicAndPartition(topic, partitionId))).andReturn(Some(log)).anyTimes()
-    EasyMock.replay(logManager)
+  val fetchSize = 100
+  val messagesToHW = new Message("messageToHW".getBytes())
+  val messagesToLEO = new Message("messageToLEO".getBytes())
 
-    val replicaManager = EasyMock.createMock(classOf[kafka.server.ReplicaManager])
-    EasyMock.expect(replicaManager.config).andReturn(configs.head)
-    EasyMock.expect(replicaManager.logManager).andReturn(logManager)
-    EasyMock.expect(replicaManager.replicaFetcherManager).andReturn(EasyMock.createMock(classOf[ReplicaFetcherManager]))
-    EasyMock.expect(replicaManager.zkClient).andReturn(zkClient)
-    EasyMock.replay(replicaManager)
-
-    val partition = getPartitionWithAllReplicasInISR(topic, partitionId, time, configs.head.brokerId, log, hw, replicaManager)
-    partition.getReplica(configs(1).brokerId).get.logEndOffset = leo - 5L
-
-    EasyMock.reset(replicaManager)
-    EasyMock.expect(replicaManager.config).andReturn(configs.head).anyTimes()
-    EasyMock.expect(replicaManager.getLeaderReplicaIfLocal(topic, partitionId)).andReturn(partition.leaderReplicaIfLocal().get).anyTimes()
-    EasyMock.replay(replicaManager)
-
-    val controller = EasyMock.createMock(classOf[kafka.controller.KafkaController])
-
-    // start a request channel with 2 processors and a queue size of 5 (this is more or less arbitrary)
-    // don't provide replica or leader callbacks since they will not be tested here
-    val requestChannel = new RequestChannel(2, 5)
-    val apis = new KafkaApis(requestChannel, replicaManager, zkClient, configs.head.brokerId, configs.head, controller)
-    val partitionStateInfo = EasyMock.createNiceMock(classOf[PartitionStateInfo])
-    apis.metadataCache.put(TopicAndPartition(topic, partitionId), partitionStateInfo)
-    EasyMock.replay(partitionStateInfo)
-    // This request (from a follower) wants to read up to 2*HW but should only get back up to HW bytes into the log
-    val goodFetch = new FetchRequestBuilder()
-          .replicaId(Request.OrdinaryConsumerId)
-          .addFetch(topic, partitionId, 0, fetchSize)
-          .build()
-    val goodFetchBB = TestUtils.createRequestByteBuffer(goodFetch)
-
-    // send the request
-    apis.handleFetchRequest(new RequestChannel.Request(processor=1, requestKey=5, buffer=goodFetchBB, startTimeMs=1))
-
-    // make sure the log only reads bytes between 0->HW (5)
-    EasyMock.verify(log)
-  }
+  val topic = "test-topic"
+  val partitionId = 0
+  val topicAndPartition = TopicAndPartition(topic, partitionId)
 
-  /**
-   * The scenario for this test is that there is one topic, "test-topic", on broker "0" that has
-   * one  partition with one follower replica on broker "1".  The leader replica on "0"
-   * has HW of "5" and LEO of "20".  The follower on broker "1" has a local replica
-   * with a HW matching the leader's ("5") and LEO of "15", meaning it's not in-sync
-   * but is still in ISR (hasn't yet expired from ISR).
-   *
-   * When the follower from broker "1" fetches data, it should see data upto the log end offset ("20")
-   */
-  def testReplicaSeesLeoWhenFetching() {
-    /* setup */
-    val time = new MockTime
-    val leo = 20
-    val hw = 5
+  val fetchInfo = Collections.singletonMap(topicAndPartition, PartitionFetchInfo(0, fetchSize)).toMap
 
-    val messages = new Message("test-message".getBytes())
+  var replicaManager: ReplicaManager = null
 
-    val followerReplicaId = configs(1).brokerId
-    val followerLEO = 15
+  override def setUp() {
+    super.setUp()
 
+    // create nice mock since we don't particularly care about zkclient calls
     val zkClient = EasyMock.createNiceMock(classOf[ZkClient])
-    EasyMock.expect(zkClient.exists(ZkUtils.ControllerEpochPath)).andReturn(false)
     EasyMock.replay(zkClient)
 
-    val log = EasyMock.createMock(classOf[kafka.log.Log])
-    EasyMock.expect(log.logEndOffset).andReturn(leo).anyTimes()
-    EasyMock.expect(log.read(followerLEO, Integer.MAX_VALUE, None)).andReturn(new ByteBufferMessageSet(messages))
+    // create nice mock since we don't particularly care about scheduler calls
+    val scheduler = EasyMock.createNiceMock(classOf[KafkaScheduler])
+    EasyMock.replay(scheduler)
+
+    // create the log which takes read with either HW max offset or none max offset
+    val log = EasyMock.createMock(classOf[Log])
+    EasyMock.expect(log.logEndOffset).andReturn(leaderLEO).anyTimes()
+    EasyMock.expect(log.read(0, fetchSize, Some(partitionHW))).andReturn(
+      new FetchDataInfo(
+        new LogOffsetMetadata(0L, 0L, 0),
+        new ByteBufferMessageSet(messagesToHW)
+      )).anyTimes()
+    EasyMock.expect(log.read(0, fetchSize, None)).andReturn(
+      new FetchDataInfo(
+        new LogOffsetMetadata(0L, 0L, 0),
+        new ByteBufferMessageSet(messagesToLEO)
+      )).anyTimes()
     EasyMock.replay(log)
 
+    // create the log manager that is aware of this mock log
     val logManager = EasyMock.createMock(classOf[kafka.log.LogManager])
-    EasyMock.expect(logManager.getLog(TopicAndPartition(topic, 0))).andReturn(Some(log)).anyTimes()
+    EasyMock.expect(logManager.getLog(topicAndPartition)).andReturn(Some(log)).anyTimes()
     EasyMock.replay(logManager)
 
-    val replicaManager = EasyMock.createMock(classOf[kafka.server.ReplicaManager])
-    EasyMock.expect(replicaManager.config).andReturn(configs.head)
-    EasyMock.expect(replicaManager.logManager).andReturn(logManager)
-    EasyMock.expect(replicaManager.replicaFetcherManager).andReturn(EasyMock.createMock(classOf[ReplicaFetcherManager]))
-    EasyMock.expect(replicaManager.zkClient).andReturn(zkClient)
-    EasyMock.replay(replicaManager)
-
-    val partition = getPartitionWithAllReplicasInISR(topic, partitionId, time, configs.head.brokerId, log, hw, replicaManager)
-    partition.getReplica(followerReplicaId).get.logEndOffset = followerLEO.asInstanceOf[Long]
-
-    EasyMock.reset(replicaManager)
-    EasyMock.expect(replicaManager.config).andReturn(configs.head).anyTimes()
-    EasyMock.expect(replicaManager.recordFollowerPosition(topic, partitionId, followerReplicaId, followerLEO))
-    EasyMock.expect(replicaManager.getReplica(topic, partitionId, followerReplicaId)).andReturn(partition.inSyncReplicas.find(_.brokerId == configs(1).brokerId))
-    EasyMock.expect(replicaManager.getLeaderReplicaIfLocal(topic, partitionId)).andReturn(partition.leaderReplicaIfLocal().get).anyTimes()
-    EasyMock.replay(replicaManager)
-
-    val controller = EasyMock.createMock(classOf[kafka.controller.KafkaController])
-
-    val requestChannel = new RequestChannel(2, 5)
-    val apis = new KafkaApis(requestChannel, replicaManager, zkClient, configs.head.brokerId, configs.head, controller)
-    val partitionStateInfo = EasyMock.createNiceMock(classOf[PartitionStateInfo])
-    apis.metadataCache.put(TopicAndPartition(topic, partitionId), partitionStateInfo)
-    EasyMock.replay(partitionStateInfo)
-
-    /**
-     * This fetch, coming from a replica, requests all data at offset "15".  Because the request is coming
-     * from a follower, the leader should oblige and read beyond the HW.
-     */
-    val bigFetch = new FetchRequestBuilder()
-      .replicaId(followerReplicaId)
-      .addFetch(topic, partitionId, followerLEO, Integer.MAX_VALUE)
-      .build()
-
-    val fetchRequestBB = TestUtils.createRequestByteBuffer(bigFetch)
-
-    // send the request
-    apis.handleFetchRequest(new RequestChannel.Request(processor=0, requestKey=5, buffer=fetchRequestBB, startTimeMs=1))
-
-    /**
-     * Make sure the log satisfies the fetch from a follower by reading data beyond the HW, mainly all bytes after
-     * an offset of 15
-     */
-    EasyMock.verify(log)
-  }
+    // create the replica manager
+    replicaManager = new ReplicaManager(configs.head, time, zkClient, scheduler, logManager, new AtomicBoolean(false))
 
-  private def getPartitionWithAllReplicasInISR(topic: String, partitionId: Int, time: Time, leaderId: Int,
-                                               localLog: Log, leaderHW: Long, replicaManager: ReplicaManager): Partition = {
-    val partition = new Partition(topic, partitionId, 2, time, replicaManager)
-    val leaderReplica = new Replica(leaderId, partition, time, 0, Some(localLog))
+    // add the partition with two replicas, both in ISR
+    val partition = replicaManager.getOrCreatePartition(topic, partitionId)
 
-    val allReplicas = getFollowerReplicas(partition, leaderId, time) :+ leaderReplica
+    // create the leader replica with the local log
+    val leaderReplica = new Replica(configs(0).brokerId, partition, time, 0, Some(log))
+    leaderReplica.highWatermark = new LogOffsetMetadata(partitionHW)
+    partition.leaderReplicaIdOpt = Some(leaderReplica.brokerId)
+
+    // create the follower replica with defined log end offset
+    val followerReplica= new Replica(configs(1).brokerId, partition, time)
+    followerReplica.logEndOffset = new LogOffsetMetadata(followerLEO, 0L, followerLEO.toInt)
+
+    // add both of them to ISR
+    val allReplicas = List(leaderReplica, followerReplica)
     allReplicas.foreach(partition.addReplicaIfNotExists(_))
-    // set in sync replicas for this partition to all the assigned replicas
     partition.inSyncReplicas = allReplicas.toSet
-    // set the leader and its hw and the hw update time
-    partition.leaderReplicaIdOpt = Some(leaderId)
-    leaderReplica.highWatermark = leaderHW
-    partition
   }
 
-  private def getFollowerReplicas(partition: Partition, leaderId: Int, time: Time): Seq[Replica] = {
-    configs.filter(_.brokerId != leaderId).map { config =>
-      new Replica(config.brokerId, partition, time)
-    }
+  override def tearDown() {
+    replicaManager.shutdown(false)
+    super.tearDown()
   }
 
+  /**
+   * The scenario for this test is that there is one topic that has one partition
+   * with one leader replica on broker "0" and one follower replica on broker "1"
+   * inside the replica manager's metadata.
+   *
+   * The leader replica on "0" has HW of "5" and LEO of "20".  The follower on
+   * broker "1" has a local replica with a HW matching the leader's ("5") and
+   * LEO of "15", meaning it's not in-sync but is still in ISR (hasn't yet expired from ISR).
+   *
+   * When a fetch operation with read committed data turned on is received, the replica manager
+   * should only return data up to the HW of the partition; when a fetch operation with read
+   * committed data turned off is received, the replica manager could return data up to the LEO
+   * of the local leader replica's log.
+   */
+  def testReadFromLog() {
+
+    assertEquals("Reading committed data should return messages only up to high watermark", messagesToHW,
+      replicaManager.readFromLocalLog(true, true, fetchInfo).get(topicAndPartition).get.info.messageSet.head.message)
+
+    assertEquals("Reading any data can return messages up to the end of the log", messagesToLEO,
+      replicaManager.readFromLocalLog(true, false, fetchInfo).get(topicAndPartition).get.info.messageSet.head.message)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/utils/ByteBoundedBlockingQueueTest.scala b/core/src/test/scala/unit/kafka/utils/ByteBoundedBlockingQueueTest.scala
new file mode 100644
index 0000000000000..fe8d2aeb642bc
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/utils/ByteBoundedBlockingQueueTest.scala
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package unit.kafka.utils
+
+import java.util.concurrent.TimeUnit
+
+import junit.framework.Assert._
+import org.junit.{Test}
+import kafka.utils.ByteBoundedBlockingQueue
+
+class ByteBoundedBlockingQueueTest {
+  val sizeFunction = (a: String) => a.length
+  val queue = new ByteBoundedBlockingQueue[String](5, 15, Some(sizeFunction))
+
+  @Test
+  def testByteBoundedBlockingQueue() {
+    assertEquals(5, queue.remainingSize)
+    assertEquals(15, queue.remainingByteSize)
+
+    //offer a message whose size is smaller than remaining capacity
+    val m0 = new String("0123456789")
+    assertEquals(true, queue.offer(m0))
+    assertEquals(1, queue.size())
+    assertEquals(10, queue.byteSize())
+    assertEquals(4, queue.remainingSize)
+    assertEquals(5, queue.remainingByteSize)
+
+    // offer a message where remaining capacity < message size < capacity limit
+    val m1 = new String("1234567890")
+    assertEquals(true, queue.offer(m1))
+    assertEquals(2, queue.size())
+    assertEquals(20, queue.byteSize())
+    assertEquals(3, queue.remainingSize)
+    assertEquals(0, queue.remainingByteSize)
+
+    // offer a message using timeout, should fail because no space is left
+    val m2 = new String("2345678901")
+    assertEquals(false, queue.offer(m2, 10, TimeUnit.MILLISECONDS))
+    assertEquals(2, queue.size())
+    assertEquals(20, queue.byteSize())
+    assertEquals(3, queue.remainingSize)
+    assertEquals(0, queue.remainingByteSize)
+
+    // take an element out of the queue
+    assertEquals("0123456789", queue.take())
+    assertEquals(1, queue.size())
+    assertEquals(10, queue.byteSize())
+    assertEquals(4, queue.remainingSize)
+    assertEquals(5, queue.remainingByteSize)
+
+    // add 5 small elements into the queue, first 4 should succeed, the 5th one should fail
+    // test put()
+    assertEquals(true, queue.put("a"))
+    assertEquals(true, queue.offer("b"))
+    assertEquals(true, queue.offer("c"))
+    assertEquals(4, queue.size())
+    assertEquals(13, queue.byteSize())
+    assertEquals(1, queue.remainingSize)
+    assertEquals(2, queue.remainingByteSize)
+
+    assertEquals(true, queue.offer("d"))
+    assertEquals(5, queue.size())
+    assertEquals(14, queue.byteSize())
+    assertEquals(0, queue.remainingSize)
+    assertEquals(1, queue.remainingByteSize)
+
+    assertEquals(false, queue.offer("e"))
+    assertEquals(5, queue.size())
+    assertEquals(14, queue.byteSize())
+    assertEquals(0, queue.remainingSize)
+    assertEquals(1, queue.remainingByteSize)
+
+    // try take 6 elements out of the queue, the last poll() should fail as there is no element anymore
+    // test take()
+    assertEquals("1234567890", queue.poll(10, TimeUnit.MILLISECONDS))
+    // test poll
+    assertEquals("a", queue.poll())
+    assertEquals("b", queue.poll())
+    assertEquals("c", queue.poll())
+    assertEquals("d", queue.poll())
+    assertEquals(null, queue.poll(10, TimeUnit.MILLISECONDS))
+  }
+
+}
diff --git a/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala b/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala
new file mode 100644
index 0000000000000..e832a0179c721
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package unit.kafka.utils
+
+import junit.framework.Assert._
+import org.junit.{Test, After, Before}
+import kafka.utils.CommandLineUtils;
+
+class CommandLineUtilsTest {
+
+
+  @Test
+  def testParseEmptyArg() {
+    val argArray = Array("my.empty.property=")
+    val props = CommandLineUtils.parseKeyValueArgs(argArray)
+    assertEquals("Empty value should be equal to empty string",props.getProperty("my.empty.property"),"")
+  }
+
+  @Test
+  def testParseSingleArg() {
+    val argArray = Array("my.property=value")
+    val props = CommandLineUtils.parseKeyValueArgs(argArray)
+    assertEquals("Value of a single property should be 'value' ",props.getProperty("my.property"),"value")
+  }
+
+  @Test
+  def testParseArgs() {
+    val argArray = Array("first.property=first","second.property=second")
+    val props = CommandLineUtils.parseKeyValueArgs(argArray)
+    assertEquals("Value of first property should be 'first'",props.getProperty("first.property"),"first")
+    assertEquals("Value of second property should be 'second'",props.getProperty("second.property"),"second")
+  }
+
+}
diff --git a/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala b/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala
new file mode 100644
index 0000000000000..84e08557de5ac
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.utils
+
+import kafka.server.{ReplicaFetcherManager, KafkaConfig}
+import kafka.api.LeaderAndIsr
+import kafka.zk.ZooKeeperTestHarness
+import kafka.common.TopicAndPartition
+import org.scalatest.junit.JUnit3Suite
+import org.junit.Assert._
+import org.junit.Test
+import org.easymock.EasyMock
+
+
+class ReplicationUtilsTest extends JUnit3Suite with ZooKeeperTestHarness {
+  val topic = "my-topic-test"
+  val partitionId = 0
+  val brokerId = 1
+  val leaderEpoch = 1
+  val controllerEpoch = 1
+  val zkVersion = 1
+  val topicPath = "/brokers/topics/my-topic-test/partitions/0/state"
+  val topicData = Json.encode(Map("controller_epoch" -> 1, "leader" -> 1,
+    "versions" -> 1, "leader_epoch" -> 1,"isr" -> List(1,2)))
+  val topicDataVersionMismatch = Json.encode(Map("controller_epoch" -> 1, "leader" -> 1,
+    "versions" -> 2, "leader_epoch" -> 1,"isr" -> List(1,2)))
+  val topicDataMismatch = Json.encode(Map("controller_epoch" -> 1, "leader" -> 1,
+    "versions" -> 2, "leader_epoch" -> 2,"isr" -> List(1,2)))
+
+
+  override def setUp() {
+    super.setUp()
+    ZkUtils.createPersistentPath(zkClient,topicPath,topicData)
+  }
+
+  @Test
+  def testUpdateLeaderAndIsr() {
+    val configs = TestUtils.createBrokerConfigs(1).map(new KafkaConfig(_))
+    val log = EasyMock.createMock(classOf[kafka.log.Log])
+    EasyMock.expect(log.logEndOffset).andReturn(20).anyTimes()
+    EasyMock.expect(log)
+    EasyMock.replay(log)
+
+    val logManager = EasyMock.createMock(classOf[kafka.log.LogManager])
+    EasyMock.expect(logManager.getLog(TopicAndPartition(topic, partitionId))).andReturn(Some(log)).anyTimes()
+    EasyMock.replay(logManager)
+
+    val replicaManager = EasyMock.createMock(classOf[kafka.server.ReplicaManager])
+    EasyMock.expect(replicaManager.config).andReturn(configs.head)
+    EasyMock.expect(replicaManager.logManager).andReturn(logManager)
+    EasyMock.expect(replicaManager.replicaFetcherManager).andReturn(EasyMock.createMock(classOf[ReplicaFetcherManager]))
+    EasyMock.expect(replicaManager.zkClient).andReturn(zkClient)
+    EasyMock.replay(replicaManager)
+
+    val replicas = List(0,1)
+
+    // regular update
+    val newLeaderAndIsr1 = new LeaderAndIsr(brokerId, leaderEpoch, replicas, 0)
+    val (updateSucceeded1,newZkVersion1) = ReplicationUtils.updateLeaderAndIsr(zkClient,
+      "my-topic-test", partitionId, newLeaderAndIsr1, controllerEpoch, 0)
+    assertTrue(updateSucceeded1)
+    assertEquals(newZkVersion1, 1)
+
+    // mismatched zkVersion with the same data
+    val newLeaderAndIsr2 = new LeaderAndIsr(brokerId, leaderEpoch, replicas, zkVersion + 1)
+    val (updateSucceeded2,newZkVersion2) = ReplicationUtils.updateLeaderAndIsr(zkClient,
+      "my-topic-test", partitionId, newLeaderAndIsr2, controllerEpoch, zkVersion + 1)
+    assertTrue(updateSucceeded2)
+    // returns true with existing zkVersion
+    assertEquals(newZkVersion2,1)
+
+    // mismatched zkVersion and leaderEpoch
+    val newLeaderAndIsr3 = new LeaderAndIsr(brokerId, leaderEpoch + 1, replicas, zkVersion + 1)
+    val (updateSucceeded3,newZkVersion3) = ReplicationUtils.updateLeaderAndIsr(zkClient,
+      "my-topic-test", partitionId, newLeaderAndIsr3, controllerEpoch, zkVersion + 1)
+    assertFalse(updateSucceeded3)
+    assertEquals(newZkVersion3,-1)
+  }
+
+}
diff --git a/core/src/test/scala/unit/kafka/utils/TestUtils.scala b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
index 500eeca2f95d9..94d0028d8c490 100644
--- a/core/src/test/scala/unit/kafka/utils/TestUtils.scala
+++ b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
@@ -23,29 +23,36 @@ import java.nio._
 import java.nio.channels._
 import java.util.Random
 import java.util.Properties
-import junit.framework.AssertionFailedError
-import junit.framework.Assert._
+
+import org.apache.kafka.common.utils.Utils._
+
+import collection.mutable.ListBuffer
+
+import org.I0Itec.zkclient.ZkClient
+
 import kafka.server._
 import kafka.producer._
 import kafka.message._
-import org.I0Itec.zkclient.ZkClient
-import kafka.cluster.Broker
-import collection.mutable.ListBuffer
-import kafka.consumer.ConsumerConfig
-import java.util.concurrent.locks.ReentrantLock
-import java.util.concurrent.TimeUnit
 import kafka.api._
-import collection.mutable.Map
+import kafka.cluster.Broker
+import kafka.consumer.{KafkaStream, ConsumerConfig}
 import kafka.serializer.{StringEncoder, DefaultEncoder, Encoder}
 import kafka.common.TopicAndPartition
-import junit.framework.Assert
+import kafka.admin.AdminUtils
+import kafka.producer.ProducerConfig
+import kafka.log._
+
+import junit.framework.AssertionFailedError
+import junit.framework.Assert._
+import org.apache.kafka.clients.producer.KafkaProducer
 
+import scala.collection.Map
 
 /**
  * Utility functions to help with testing
  */
 object TestUtils extends Logging {
-  
+
   val IoTmpDir = System.getProperty("java.io.tmpdir")
 
   val Letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@@ -81,6 +88,25 @@ object TestUtils extends Logging {
     val f = new File(IoTmpDir, "kafka-" + random.nextInt(1000000))
     f.mkdirs()
     f.deleteOnExit()
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      override def run() = {
+        Utils.rm(f)
+      }
+    })
+    
+    f
+  }
+
+  def tempTopic(): String = "testTopic" + random.nextInt(1000000)
+
+  /**
+   * Create a temporary relative directory
+   */
+  def tempRelativeDir(parent: String): File = {
+    val f = new File(parent, "kafka-" + random.nextInt(1000000))
+    f.mkdirs()
+    f.deleteOnExit()
     f
   }
 
@@ -112,19 +138,21 @@ object TestUtils extends Logging {
   /**
    * Create a test config for the given node id
    */
-  def createBrokerConfigs(numConfigs: Int): List[Properties] = {
+  def createBrokerConfigs(numConfigs: Int,
+    enableControlledShutdown: Boolean = true): List[Properties] = {
     for((port, node) <- choosePorts(numConfigs).zipWithIndex)
-    yield createBrokerConfig(node, port)
+    yield createBrokerConfig(node, port, enableControlledShutdown)
   }
 
   def getBrokerListStrFromConfigs(configs: Seq[KafkaConfig]): String = {
-    configs.map(c => c.hostName + ":" + c.port).mkString(",")
+    configs.map(c => formatAddress(c.hostName, c.port)).mkString(",")
   }
 
   /**
    * Create a test config for the given node id
    */
-  def createBrokerConfig(nodeId: Int, port: Int = choosePort()): Properties = {
+  def createBrokerConfig(nodeId: Int, port: Int = choosePort(),
+    enableControlledShutdown: Boolean = true): Properties = {
     val props = new Properties
     props.put("broker.id", nodeId.toString)
     props.put("host.name", "localhost")
@@ -132,9 +160,46 @@ object TestUtils extends Logging {
     props.put("log.dir", TestUtils.tempDir().getAbsolutePath)
     props.put("zookeeper.connect", TestZKUtils.zookeeperConnect)
     props.put("replica.socket.timeout.ms", "1500")
+    props.put("controlled.shutdown.enable", enableControlledShutdown.toString)
     props
   }
 
+  /**
+   * Create a topic in zookeeper.
+   * Wait until the leader is elected and the metadata is propagated to all brokers.
+   * Return the leader for each partition.
+   */
+  def createTopic(zkClient: ZkClient,
+                  topic: String,
+                  numPartitions: Int = 1,
+                  replicationFactor: Int = 1,
+                  servers: Seq[KafkaServer],
+                  topicConfig: Properties = new Properties) : scala.collection.immutable.Map[Int, Option[Int]] = {
+    // create topic
+    AdminUtils.createTopic(zkClient, topic, numPartitions, replicationFactor, topicConfig)
+    // wait until the update metadata request for new topic reaches all servers
+    (0 until numPartitions).map { case i =>
+      TestUtils.waitUntilMetadataIsPropagated(servers, topic, i)
+      i -> TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, i)
+    }.toMap
+  }
+
+  /**
+   * Create a topic in zookeeper using a customized replica assignment.
+   * Wait until the leader is elected and the metadata is propagated to all brokers.
+   * Return the leader for each partition.
+   */
+  def createTopic(zkClient: ZkClient, topic: String, partitionReplicaAssignment: collection.Map[Int, Seq[Int]],
+                  servers: Seq[KafkaServer]) : scala.collection.immutable.Map[Int, Option[Int]] = {
+    // create topic
+    AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkClient, topic, partitionReplicaAssignment)
+    // wait until the update metadata request for new topic reaches all servers
+    partitionReplicaAssignment.keySet.map { case i =>
+      TestUtils.waitUntilMetadataIsPropagated(servers, topic, i)
+      i -> TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, i)
+    }.toMap
+  }
+
   /**
    * Create a test config for a consumer
    */
@@ -145,7 +210,7 @@ object TestUtils extends Logging {
     props.put("group.id", groupId)
     props.put("consumer.id", consumerId)
     props.put("consumer.timeout.ms", consumerTimeout.toString)
-    props.put("zookeeper.session.timeout.ms", "400")
+    props.put("zookeeper.session.timeout.ms", "6000")
     props.put("zookeeper.sync.time.ms", "200")
     props.put("auto.commit.interval.ms", "1000")
     props.put("rebalance.max.retries", "4")
@@ -290,30 +355,62 @@ object TestUtils extends Logging {
   }
 
   /**
-   * Create a producer for the given host and port
+   * Create a producer with a few pre-configured properties.
+   * If certain properties need to be overridden, they can be provided in producerProps.
    */
-  def createProducer[K, V](brokerList: String, 
-                           encoder: Encoder[V] = new DefaultEncoder(), 
-                           keyEncoder: Encoder[K] = new DefaultEncoder()): Producer[K, V] = {
-    val props = new Properties()
-    props.put("metadata.broker.list", brokerList)
-    props.put("send.buffer.bytes", "65536")
-    props.put("connect.timeout.ms", "100000")
-    props.put("reconnect.interval", "10000")
-    props.put("serializer.class", encoder.getClass.getCanonicalName)
-    props.put("key.serializer.class", keyEncoder.getClass.getCanonicalName)
+  def createProducer[K, V](brokerList: String,
+                           encoder: String = classOf[DefaultEncoder].getName,
+                           keyEncoder: String = classOf[DefaultEncoder].getName,
+                           partitioner: String = classOf[DefaultPartitioner].getName,
+                           producerProps: Properties = null): Producer[K, V] = {
+    val props: Properties = getProducerConfig(brokerList)
+
+    //override any explicitly specified properties
+    if (producerProps != null)
+      props.putAll(producerProps)
+
+    props.put("serializer.class", encoder)
+    props.put("key.serializer.class", keyEncoder)
+    props.put("partitioner.class", partitioner)
     new Producer[K, V](new ProducerConfig(props))
   }
 
-  def getProducerConfig(brokerList: String, partitioner: String = "kafka.producer.DefaultPartitioner"): Properties = {
+  /**
+   * Create a (new) producer with a few pre-configured properties.
+   */
+  def createNewProducer(brokerList: String,
+                        acks: Int = -1,
+                        metadataFetchTimeout: Long = 3000L,
+                        blockOnBufferFull: Boolean = true,
+                        bufferSize: Long = 1024L * 1024L,
+                        retries: Int = 0) : KafkaProducer[Array[Byte],Array[Byte]] = {
+    import org.apache.kafka.clients.producer.ProducerConfig
+
+    val producerProps = new Properties()
+    producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
+    producerProps.put(ProducerConfig.ACKS_CONFIG, acks.toString)
+    producerProps.put(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG, metadataFetchTimeout.toString)
+    producerProps.put(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG, blockOnBufferFull.toString)
+    producerProps.put(ProducerConfig.BUFFER_MEMORY_CONFIG, bufferSize.toString)
+    producerProps.put(ProducerConfig.RETRIES_CONFIG, retries.toString)
+    producerProps.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, "100")
+    producerProps.put(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG, "200")
+    return new KafkaProducer[Array[Byte],Array[Byte]](producerProps)
+  }
+
+  /**
+   * Create a default producer config properties map with the given metadata broker list
+   */
+  def getProducerConfig(brokerList: String): Properties = {
     val props = new Properties()
     props.put("metadata.broker.list", brokerList)
-    props.put("partitioner.class", partitioner)
-    props.put("message.send.max.retries", "3")
+    props.put("message.send.max.retries", "5")
     props.put("retry.backoff.ms", "1000")
-    props.put("request.timeout.ms", "500")
+    props.put("request.timeout.ms", "2000")
     props.put("request.required.acks", "-1")
-    props.put("serializer.class", classOf[StringEncoder].getName.toString)
+    props.put("send.buffer.bytes", "65536")
+    props.put("connect.timeout.ms", "100000")
+    props.put("reconnect.interval", "10000")
 
     props
   }
@@ -324,7 +421,7 @@ object TestUtils extends Logging {
     props.put("port", port.toString)
     props.put("request.timeout.ms", "500")
     props.put("request.required.acks", "1")
-    props.put("serializer.class", classOf[StringEncoder].getName.toString)
+    props.put("serializer.class", classOf[StringEncoder].getName)
     props
   }
 
@@ -367,9 +464,9 @@ object TestUtils extends Logging {
   /**
    * Create a wired format request based on simple basic information
    */
-  def produceRequest(topic: String, 
-                     partition: Int, 
-                     message: ByteBufferMessageSet, 
+  def produceRequest(topic: String,
+                     partition: Int,
+                     message: ByteBufferMessageSet,
                      acks: Int = SyncProducerConfig.DefaultRequiredAcks,
                      timeout: Int = SyncProducerConfig.DefaultAckTimeoutMs,
                      correlationId: Int = 0,
@@ -377,17 +474,17 @@ object TestUtils extends Logging {
     produceRequestWithAcks(Seq(topic), Seq(partition), message, acks, timeout, correlationId, clientId)
   }
 
-  def produceRequestWithAcks(topics: Seq[String], 
-                             partitions: Seq[Int], 
-                             message: ByteBufferMessageSet, 
-                             acks: Int = SyncProducerConfig.DefaultRequiredAcks, 
+  def produceRequestWithAcks(topics: Seq[String],
+                             partitions: Seq[Int],
+                             message: ByteBufferMessageSet,
+                             acks: Int = SyncProducerConfig.DefaultRequiredAcks,
                              timeout: Int = SyncProducerConfig.DefaultAckTimeoutMs,
                              correlationId: Int = 0,
                              clientId: String = SyncProducerConfig.DefaultClientId): ProducerRequest = {
     val data = topics.flatMap(topic =>
       partitions.map(partition => (TopicAndPartition(topic,  partition), message))
     )
-    new ProducerRequest(correlationId, clientId, acks.toShort, timeout, Map(data:_*))
+    new ProducerRequest(correlationId, clientId, acks.toShort, timeout, collection.mutable.Map(data:_*))
   }
 
   def makeLeaderForPartition(zkClient: ZkClient, topic: String,
@@ -418,36 +515,51 @@ object TestUtils extends Logging {
     }
   }
 
-  def waitUntilLeaderIsElectedOrChanged(zkClient: ZkClient, topic: String, partition: Int, timeoutMs: Long, oldLeaderOpt: Option[Int] = None): Option[Int] = {
-    val leaderLock = new ReentrantLock()
-    val leaderExistsOrChanged = leaderLock.newCondition()
+  /**
+   *  If neither oldLeaderOpt nor newLeaderOpt is defined, wait until the leader of a partition is elected.
+   *  If oldLeaderOpt is defined, it waits until the new leader is different from the old leader.
+   *  If newLeaderOpt is defined, it waits until the new leader becomes the expected new leader.
+   * @return The new leader or assertion failure if timeout is reached.
+   */
+  def waitUntilLeaderIsElectedOrChanged(zkClient: ZkClient, topic: String, partition: Int, timeoutMs: Long = 5000L,
+                                        oldLeaderOpt: Option[Int] = None, newLeaderOpt: Option[Int] = None): Option[Int] = {
+    require(!(oldLeaderOpt.isDefined && newLeaderOpt.isDefined), "Can't define both the old and the new leader")
+    val startTime = System.currentTimeMillis()
+    var isLeaderElectedOrChanged = false
 
-    if(oldLeaderOpt == None)
-      info("Waiting for leader to be elected for partition [%s,%d]".format(topic, partition))
-    else
-      info("Waiting for leader for partition [%s,%d] to be changed from old leader %d".format(topic, partition, oldLeaderOpt.get))
+    trace("Waiting for leader to be elected or changed for partition [%s,%d], older leader is %s, new leader is %s"
+          .format(topic, partition, oldLeaderOpt, newLeaderOpt))
 
-    leaderLock.lock()
-    try {
-      zkClient.subscribeDataChanges(ZkUtils.getTopicPartitionLeaderAndIsrPath(topic, partition), new LeaderExistsOrChangedListener(topic, partition, leaderLock, leaderExistsOrChanged, oldLeaderOpt, zkClient))
-      leaderExistsOrChanged.await(timeoutMs, TimeUnit.MILLISECONDS)
+    var leader: Option[Int] = None
+    while (!isLeaderElectedOrChanged && System.currentTimeMillis() < startTime + timeoutMs) {
       // check if leader is elected
-      val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition)
+      leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition)
       leader match {
         case Some(l) =>
-          if(oldLeaderOpt == None)
-            info("Leader %d is elected for partition [%s,%d]".format(l, topic, partition))
-          else
-            info("Leader for partition [%s,%d] is changed from %d to %d".format(topic, partition, oldLeaderOpt.get, l))
-        case None => error("Timing out after %d ms since leader is not elected for partition [%s,%d]"
-                                   .format(timeoutMs, topic, partition))
+          if (newLeaderOpt.isDefined && newLeaderOpt.get == l) {
+            trace("Expected new leader %d is elected for partition [%s,%d]".format(l, topic, partition))
+            isLeaderElectedOrChanged = true
+          } else if (oldLeaderOpt.isDefined && oldLeaderOpt.get != l) {
+            trace("Leader for partition [%s,%d] is changed from %d to %d".format(topic, partition, oldLeaderOpt.get, l))
+            isLeaderElectedOrChanged = true
+          } else if (!oldLeaderOpt.isDefined) {
+            trace("Leader %d is elected for partition [%s,%d]".format(l, topic, partition))
+            isLeaderElectedOrChanged = true
+          } else {
+            trace("Current leader for partition [%s,%d] is %d".format(topic, partition, l))
+          }
+        case None =>
+          trace("Leader for partition [%s,%d] is not elected yet".format(topic, partition))
       }
-      leader
-    } finally {
-      leaderLock.unlock()
+      Thread.sleep(timeoutMs.min(100L))
     }
+    if (!isLeaderElectedOrChanged)
+      fail("Timing out after %d ms since leader is not elected or changed for partition [%s,%d]"
+           .format(timeoutMs, topic, partition))
+
+    return leader
   }
-  
+
   /**
    * Execute the given block. If it throws an assert error, retry. Repeat
    * until no error is thrown or the time limit ellapses
@@ -461,7 +573,7 @@ object TestUtils extends Logging {
         return
       } catch {
         case e: AssertionFailedError =>
-          val ellapsed = System.currentTimeMillis - startTime 
+          val ellapsed = System.currentTimeMillis - startTime
           if(ellapsed > maxWaitMs) {
             throw e
           } else {
@@ -474,15 +586,15 @@ object TestUtils extends Logging {
   }
 
   /**
-   * Wait until the given condition is true or the given wait time ellapses
+   * Wait until the given condition is true or throw an exception if the given wait time elapses.
    */
-  def waitUntilTrue(condition: () => Boolean, waitTime: Long): Boolean = {
+  def waitUntilTrue(condition: () => Boolean, msg: String, waitTime: Long = 5000L): Boolean = {
     val startTime = System.currentTimeMillis()
     while (true) {
       if (condition())
         return true
       if (System.currentTimeMillis() > startTime + waitTime)
-        return false
+        fail(msg)
       Thread.sleep(waitTime.min(100L))
     }
     // should never hit here
@@ -510,12 +622,35 @@ object TestUtils extends Logging {
     byteBuffer
   }
 
-  def waitUntilMetadataIsPropagated(servers: Seq[KafkaServer], topic: String, partition: Int, timeout: Long) = {
-    Assert.assertTrue("Partition [%s,%d] metadata not propagated after timeout".format(topic, partition),
-      TestUtils.waitUntilTrue(() =>
-        servers.foldLeft(true)(_ && _.apis.metadataCache.keySet.contains(TopicAndPartition(topic, partition))), timeout))
+
+  /**
+   * Wait until a valid leader is propagated to the metadata cache in each broker.
+   * It assumes that the leader propagated to each broker is the same.
+   * @param servers The list of servers that the metadata should reach to
+   * @param topic The topic name
+   * @param partition The partition Id
+   * @param timeout The amount of time waiting on this condition before assert to fail
+   * @return The leader of the partition.
+   */
+  def waitUntilMetadataIsPropagated(servers: Seq[KafkaServer], topic: String, partition: Int, timeout: Long = 5000L): Int = {
+    var leader: Int = -1
+    TestUtils.waitUntilTrue(() =>
+      servers.foldLeft(true) {
+        (result, server) =>
+          val partitionStateOpt = server.apis.metadataCache.getPartitionInfo(topic, partition)
+          partitionStateOpt match {
+            case None => false
+            case Some(partitionState) =>
+              leader = partitionState.leaderIsrAndControllerEpoch.leaderAndIsr.leader
+              result && Request.isValidBrokerId(leader)
+          }
+      },
+      "Partition [%s,%d] metadata not propagated after %d ms".format(topic, partition, timeout),
+      waitTime = timeout)
+
+    leader
   }
-  
+
   def writeNonsenseToFile(fileName: File, position: Long, size: Int) {
     val file = new RandomAccessFile(fileName, "rw")
     file.seek(position)
@@ -523,7 +658,7 @@ object TestUtils extends Logging {
       file.writeByte(random.nextInt(255))
     file.close()
   }
-  
+
   def appendNonsenseToFile(fileName: File, size: Int) {
     val file = new FileOutputStream(fileName, true)
     for(i <- 0 until size)
@@ -541,19 +676,118 @@ object TestUtils extends Logging {
 
   def ensureNoUnderReplicatedPartitions(zkClient: ZkClient, topic: String, partitionToBeReassigned: Int, assignedReplicas: Seq[Int],
                                                 servers: Seq[KafkaServer]) {
-    val inSyncReplicas = ZkUtils.getInSyncReplicasForPartition(zkClient, topic, partitionToBeReassigned)
-    assertFalse("Reassigned partition [%s,%d] is underreplicated".format(topic, partitionToBeReassigned),
-      inSyncReplicas.size < assignedReplicas.size)
-    val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partitionToBeReassigned)
-    assertTrue("Reassigned partition [%s,%d] is unavailable".format(topic, partitionToBeReassigned), leader.isDefined)
-    val leaderBroker = servers.filter(s => s.config.brokerId == leader.get).head
-    assertTrue("Reassigned partition [%s,%d] is underreplicated as reported by the leader %d".format(topic, partitionToBeReassigned, leader.get),
-      leaderBroker.replicaManager.underReplicatedPartitionCount() == 0)
+    TestUtils.waitUntilTrue(() => {
+        val inSyncReplicas = ZkUtils.getInSyncReplicasForPartition(zkClient, topic, partitionToBeReassigned)
+        inSyncReplicas.size == assignedReplicas.size
+      },
+      "Reassigned partition [%s,%d] is under replicated".format(topic, partitionToBeReassigned))
+    var leader: Option[Int] = None
+    TestUtils.waitUntilTrue(() => {
+        leader = ZkUtils.getLeaderForPartition(zkClient, topic, partitionToBeReassigned)
+        leader.isDefined
+      },
+      "Reassigned partition [%s,%d] is unavailable".format(topic, partitionToBeReassigned))
+    TestUtils.waitUntilTrue(() => {
+        val leaderBroker = servers.filter(s => s.config.brokerId == leader.get).head
+        leaderBroker.replicaManager.underReplicatedPartitionCount() == 0
+      },
+      "Reassigned partition [%s,%d] is under-replicated as reported by the leader %d".format(topic, partitionToBeReassigned, leader.get))
   }
 
   def checkIfReassignPartitionPathExists(zkClient: ZkClient): Boolean = {
     ZkUtils.pathExists(zkClient, ZkUtils.ReassignPartitionsPath)
   }
+
+
+  /**
+   * Create new LogManager instance with default configuration for testing
+   */
+  def createLogManager(
+    logDirs: Array[File] = Array.empty[File],
+    defaultConfig: LogConfig = LogConfig(),
+    cleanerConfig: CleanerConfig = CleanerConfig(enableCleaner = false),
+    time: MockTime = new MockTime()) =
+  {
+    new LogManager(
+      logDirs = logDirs,
+      topicConfigs = Map(),
+      defaultConfig = defaultConfig,
+      cleanerConfig = cleanerConfig,
+      ioThreads = 4,
+      flushCheckMs = 1000L,
+      flushCheckpointMs = 10000L,
+      retentionCheckMs = 1000L,
+      scheduler = time.scheduler,
+      time = time,
+      brokerState = new BrokerState())
+  }
+
+  def sendMessagesToPartition(configs: Seq[KafkaConfig],
+                              topic: String,
+                              partition: Int,
+                              numMessages: Int,
+                              compression: CompressionCodec = NoCompressionCodec): List[String] = {
+    val header = "test-%d".format(partition)
+    val props = new Properties()
+    props.put("compression.codec", compression.codec.toString)
+    val producer: Producer[Int, String] =
+      createProducer(TestUtils.getBrokerListStrFromConfigs(configs),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[IntEncoder].getName,
+        partitioner = classOf[FixedValuePartitioner].getName,
+        producerProps = props)
+
+    val ms = 0.until(numMessages).map(x => header + "-" + x)
+    producer.send(ms.map(m => new KeyedMessage[Int, String](topic, partition, m)):_*)
+    debug("Sent %d messages for partition [%s,%d]".format(ms.size, topic, partition))
+    producer.close()
+    ms.toList
+  }
+
+  def sendMessages(configs: Seq[KafkaConfig],
+                   topic: String,
+                   producerId: String,
+                   messagesPerNode: Int,
+                   header: String,
+                   compression: CompressionCodec,
+                   numParts: Int): List[String]= {
+    var messages: List[String] = Nil
+    val props = new Properties()
+    props.put("compression.codec", compression.codec.toString)
+    props.put("client.id", producerId)
+    val   producer: Producer[Int, String] =
+      createProducer(brokerList = TestUtils.getBrokerListStrFromConfigs(configs),
+        encoder = classOf[StringEncoder].getName,
+        keyEncoder = classOf[IntEncoder].getName,
+        partitioner = classOf[FixedValuePartitioner].getName,
+        producerProps = props)
+
+    for (partition <- 0 until numParts) {
+      val ms = 0.until(messagesPerNode).map(x => header + "-" + partition + "-" + x)
+      producer.send(ms.map(m => new KeyedMessage[Int, String](topic, partition, m)):_*)
+      messages ++= ms
+      debug("Sent %d messages for partition [%s,%d]".format(ms.size, topic, partition))
+    }
+    producer.close()
+    messages
+  }
+
+  def getMessages(nMessagesPerThread: Int,
+                  topicMessageStreams: Map[String, List[KafkaStream[String, String]]]): List[String] = {
+    var messages: List[String] = Nil
+    for ((topic, messageStreams) <- topicMessageStreams) {
+      for (messageStream <- messageStreams) {
+        val iterator = messageStream.iterator
+        for (i <- 0 until nMessagesPerThread) {
+          assertTrue(iterator.hasNext)
+          val message = iterator.next.message
+          messages ::= message
+          debug("received message: " + message)
+        }
+      }
+    }
+    messages.reverse
+  }
 }
 
 object TestZKUtils {
diff --git a/core/src/test/scala/unit/kafka/utils/UtilsTest.scala b/core/src/test/scala/unit/kafka/utils/UtilsTest.scala
index 920f318c85a74..066553cad290c 100644
--- a/core/src/test/scala/unit/kafka/utils/UtilsTest.scala
+++ b/core/src/test/scala/unit/kafka/utils/UtilsTest.scala
@@ -20,7 +20,6 @@ package kafka.utils
 import java.util.Arrays
 import java.util.concurrent.locks.ReentrantLock
 import java.nio.ByteBuffer
-import java.io._
 import org.apache.log4j.Logger
 import org.scalatest.junit.JUnitSuite
 import org.junit.Assert._
@@ -64,7 +63,16 @@ class UtilsTest extends JUnitSuite {
       assertTrue(Arrays.equals(bytes, Utils.readBytes(ByteBuffer.wrap(bytes))))
     }
   }
-  
+
+  @Test
+  def testAbs() {
+    assertEquals(0, Utils.abs(Integer.MIN_VALUE))
+    assertEquals(1, Utils.abs(-1))
+    assertEquals(0, Utils.abs(0))
+    assertEquals(1, Utils.abs(1))
+    assertEquals(Integer.MAX_VALUE, Utils.abs(Integer.MAX_VALUE))
+  }
+
   @Test
   def testReplaceSuffix() {
     assertEquals("blah.foo.text", Utils.replaceSuffix("blah.foo.txt", ".txt", ".text"))
@@ -72,7 +80,7 @@ class UtilsTest extends JUnitSuite {
     assertEquals("txt.txt", Utils.replaceSuffix("txt.txt.txt", ".txt", ""))
     assertEquals("foo.txt", Utils.replaceSuffix("foo", "", ".txt"))
   }
-  
+
   @Test
   def testReadInt() {
     val values = Array(0, 1, -1, Byte.MaxValue, Short.MaxValue, 2 * Short.MaxValue, Int.MaxValue/2, Int.MinValue/2, Int.MaxValue, Int.MinValue, Int.MaxValue)
@@ -81,7 +89,6 @@ class UtilsTest extends JUnitSuite {
       buffer.putInt(i*4, values(i))
       assertEquals("Written value should match read value.", values(i), Utils.readInt(buffer.array, i*4))
     }
-
   }
 
   @Test
@@ -96,7 +103,43 @@ class UtilsTest extends JUnitSuite {
     assertTrue(emptyStringList.equals(emptyListFromNullString))
     assertTrue(emptyStringList.equals(emptyList))
   }
-  
+
+  @Test
+  def testCsvMap() {
+    val emptyString: String = ""
+    val emptyMap = Utils.parseCsvMap(emptyString)
+    val emptyStringMap = Map.empty[String, String]
+    assertTrue(emptyMap != null)
+    assertTrue(emptyStringMap.equals(emptyStringMap))
+
+    val kvPairsIpV6: String = "a:b:c:v,a:b:c:v"
+    val ipv6Map = Utils.parseCsvMap(kvPairsIpV6)
+    for (m <- ipv6Map) {
+      assertTrue(m._1.equals("a:b:c"))
+      assertTrue(m._2.equals("v"))
+    }
+
+    val singleEntry:String = "key:value"
+    val singleMap = Utils.parseCsvMap(singleEntry)
+    val value = singleMap.getOrElse("key", 0)
+    assertTrue(value.equals("value"))
+
+    val kvPairsIpV4: String = "192.168.2.1/30:allow, 192.168.2.1/30:allow"
+    val ipv4Map = Utils.parseCsvMap(kvPairsIpV4)
+    for (m <- ipv4Map) {
+      assertTrue(m._1.equals("192.168.2.1/30"))
+      assertTrue(m._2.equals("allow"))
+    }
+
+    val kvPairsSpaces: String = "key:value      , key:   value"
+    val spaceMap = Utils.parseCsvMap(kvPairsSpaces)
+    for (m <- spaceMap) {
+      assertTrue(m._1.equals("key"))
+      assertTrue(m._2.equals("value"))
+    }
+  }
+
+
   @Test
   def testInLock() {
     val lock = new ReentrantLock()
@@ -106,6 +149,5 @@ class UtilsTest extends JUnitSuite {
     }
     assertEquals(2, result)
     assertFalse("Should be unlocked", lock.isLocked)
-    
   }
 }
diff --git a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala
index d883bdeee1f58..3151561508938 100644
--- a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala
+++ b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala
@@ -18,18 +18,19 @@
 package kafka.zk
 
 import org.apache.zookeeper.server.ZooKeeperServer
-import org.apache.zookeeper.server.NIOServerCnxn
+import org.apache.zookeeper.server.NIOServerCnxnFactory
 import kafka.utils.TestUtils
 import java.net.InetSocketAddress
 import kafka.utils.Utils
+import org.apache.kafka.common.utils.Utils.getPort
 
 class EmbeddedZookeeper(val connectString: String) {
   val snapshotDir = TestUtils.tempDir()
   val logDir = TestUtils.tempDir()
   val tickTime = 500
   val zookeeper = new ZooKeeperServer(snapshotDir, logDir, tickTime)
-  val port = connectString.split(":")(1).toInt
-  val factory = new NIOServerCnxn.Factory(new InetSocketAddress("127.0.0.1", port))
+  val factory = new NIOServerCnxnFactory()
+  factory.configure(new InetSocketAddress("127.0.0.1", getPort(connectString)), 0)
   factory.startup(zookeeper)
 
   def shutdown() {
diff --git a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala
index 4e25b926d32e4..67d9c4bab270c 100644
--- a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala
+++ b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala
@@ -29,15 +29,15 @@ trait ZooKeeperTestHarness extends JUnit3Suite {
   val zkSessionTimeout = 6000
 
   override def setUp() {
+    super.setUp
     zookeeper = new EmbeddedZookeeper(zkConnect)
     zkClient = new ZkClient(zookeeper.connectString, zkSessionTimeout, zkConnectionTimeout, ZKStringSerializer)
-    super.setUp
   }
 
   override def tearDown() {
-    super.tearDown
     Utils.swallow(zkClient.close())
     Utils.swallow(zookeeper.shutdown())
+    super.tearDown
   }
 
 }
diff --git a/doap_Kafka.rdf b/doap_Kafka.rdf
new file mode 100644
index 0000000000000..5381e9f8ce4c2
--- /dev/null
+++ b/doap_Kafka.rdf
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl"?>
+<rdf:RDF xml:lang="en"
+         xmlns="http://usefulinc.com/ns/doap#" 
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
+         xmlns:asfext="http://projects.apache.org/ns/asfext#"
+         xmlns:foaf="http://xmlns.com/foaf/0.1/">
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+   
+         http://www.apache.org/licenses/LICENSE-2.0
+   
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <Project rdf:about="http://kafka.apache.org/">
+    <created>2014-04-12</created>
+    <license rdf:resource="http://usefulinc.com/doap/licenses/asl20" />
+    <name>Apache Kafka</name>
+    <homepage rdf:resource="http://kafka.apache.org/" />
+    <asfext:pmc rdf:resource="http://kafka.apache.org" />
+    <shortdesc>Apache Kafka is a distributed, fault tolerant, publish-subscribe messaging.</shortdesc>
+    <description>A single Kafka broker can handle hundreds of megabytes of reads and writes per second from thousands of clients. Kafka is designed to allow a single cluster to serve as the central data backbone for a large organization. It can be elastically and transparently expanded without downtime. Data streams are partitioned and spread over a cluster of machines to allow data streams larger than the capability of any single machine and to allow clusters of co-ordinated consumers. Kafka has a modern cluster-centric design that offers strong durability and fault-tolerance guarantees. Messages are persisted on disk and replicated within the cluster to prevent data loss. Each broker can handle terabytes of messages without performance impact.</description>
+    <bug-database rdf:resource="https://issues.apache.org/jira/browse/KAFKA" />
+    <mailing-list rdf:resource="http://kafka.apache.org/contact.html" />
+    <download-page rdf:resource="http://kafka.apache.org/downloads.html" />
+    <programming-language>Scala</programming-language>
+    <category rdf:resource="http://projects.apache.org/category/big-data" />
+    <release>
+      <Version>
+        <name>Kafka 0.8.1</name>
+        <created>2014-03-12</created>
+        <revision>0.8.1</revision>
+      </Version>
+    </release>
+    <repository>
+      <SVNRepository>
+        <location rdf:resource="http://git-wip-us.apache.org/repos/asf/kafka.git"/>
+        <browse rdf:resource="https://github.com/apache/kafka"/>
+      </SVNRepository>
+    </repository>
+    <maintainer>
+      <foaf:Person>
+        <foaf:name>Jun Rao</foaf:name>
+          <foaf:mbox rdf:resource="mailto:junrao@apache.org"/>
+      </foaf:Person>
+    </maintainer>
+  </Project>
+</rdf:RDF>
diff --git a/examples/README b/examples/README
index 61de2868de29e..53db6969b2e2d 100644
--- a/examples/README
+++ b/examples/README
@@ -1,17 +1,6 @@
 This directory contains examples of client code that uses kafka.
 
-The default target for ant is kafka.examples.KafkaConsumerProducerDemo which sends and receives 
-messages from Kafka server.
-
-In order to run demo from SBT:
-   1. Start Zookeeper and the Kafka server
-   2. ./sbt from top-level kafka directory
-   3. Switch to the kafka java examples project -> project kafka-examples
-   4. execute run -> run
-   5. For simple consumer demo, select option 1
-      For unlimited producer-consumer run, select option 2
-
-To run the demo using scripts: 
+To run the demo: 
 
    1. Start Zookeeper and the Kafka server
    2. For simple consumer demo, run bin/java-simple-consumer-demo.sh
diff --git a/examples/bin/java-producer-consumer-demo.sh b/examples/bin/java-producer-consumer-demo.sh
index 29e01c2dcf823..fd25e5955397e 100755
--- a/examples/bin/java-producer-consumer-demo.sh
+++ b/examples/bin/java-producer-consumer-demo.sh
@@ -16,44 +16,7 @@
 
 base_dir=$(dirname $0)/../..
 
-for file in $base_dir/project/boot/scala-2.8.0/lib/*.jar;
-do
-  if [ ${file##*/} != "sbt-launch.jar" ]; then
-    CLASSPATH=$CLASSPATH:$file
-  fi
-done
-
-for file in $base_dir/core/lib_managed/scala_2.8.0/compile/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/core/lib/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/core/target/scala_2.8.0/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/examples/target/scala_2.8.0/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-echo $CLASSPATH
-
-if [ -z "$KAFKA_PERF_OPTS" ]; then
-  KAFKA_OPTS="-Xmx512M -server -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=3333 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
-fi
-
-if [ -z "$JAVA_HOME" ]; then
-  JAVA="java"
-else
-  JAVA="$JAVA_HOME/bin/java"
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
 fi
-
-$JAVA $KAFKA_OPTS -cp $CLASSPATH kafka.examples.KafkaConsumerProducerDemo $@
-
+exec $base_dir/bin/kafka-run-class.sh kafka.examples.KafkaConsumerProducerDemo $@
diff --git a/examples/bin/java-simple-consumer-demo.sh b/examples/bin/java-simple-consumer-demo.sh
index 4716a098c7d40..c4f103e827e69 100755
--- a/examples/bin/java-simple-consumer-demo.sh
+++ b/examples/bin/java-simple-consumer-demo.sh
@@ -16,44 +16,7 @@
 
 base_dir=$(dirname $0)/../..
 
-for file in $base_dir/project/boot/scala-2.8.0/lib/*.jar;
-do
-  if [ ${file##*/} != "sbt-launch.jar" ]; then
-    CLASSPATH=$CLASSPATH:$file
-  fi
-done
-
-for file in $base_dir/core/lib_managed/scala_2.8.0/compile/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/core/lib/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/core/target/scala_2.8.0/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-for file in $base_dir/examples/target/scala_2.8.0/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-echo $CLASSPATH
-
-if [ -z "$KAFKA_PERF_OPTS" ]; then
-  KAFKA_OPTS="-Xmx512M -server -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=3333 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
-fi
-
-if [ -z "$JAVA_HOME" ]; then
-  JAVA="java"
-else
-  JAVA="$JAVA_HOME/bin/java"
+if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
+    export KAFKA_HEAP_OPTS="-Xmx512M"
 fi
-
-$JAVA $KAFKA_OPTS -cp $CLASSPATH kafka.examples.SimpleConsumerDemo $@
-
+exec $base_dir/bin/kafka-run-class.sh kafka.examples.SimpleConsumerDemo $@
diff --git a/examples/build.sbt b/examples/build.sbt
deleted file mode 100644
index d12d70142f6ec..0000000000000
--- a/examples/build.sbt
+++ /dev/null
@@ -1,3 +0,0 @@
-name := "kafka-java-examples"
-
-crossPaths := false
diff --git a/gradle.properties b/gradle.properties
index ad7a2f0f62930..19150cf2561b3 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -14,11 +14,11 @@
 # limitations under the License.
 
 group=org.apache.kafka
-version=0.8.1
-scalaVersion=2.8.0
+version=0.8.3-SNAPSHOT
+scalaVersion=2.10.4
 task=build
+org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m
 
-#mavenUrl=file://localhost/tmp/maven
-mavenUrl=http://your.maven.repository
-mavenUsername=your.username
-mavenPassword=your.password
+mavenUrl=
+mavenUsername=
+mavenPassword=
diff --git a/gradle/buildscript.gradle b/gradle/buildscript.gradle
index 225e0a82708bc..5e45c06e8bb8b 100644
--- a/gradle/buildscript.gradle
+++ b/gradle/buildscript.gradle
@@ -8,5 +8,5 @@ repositories {
 }
 
 dependencies {
-  classpath 'nl.javadude.gradle.plugins:license-gradle-plugin:0.6.1'
+  classpath 'nl.javadude.gradle.plugins:license-gradle-plugin:0.10.0'
 }
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
deleted file mode 100644
index a7634b071cb25..0000000000000
Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
deleted file mode 100644
index 610282a699afc..0000000000000
--- a/gradle/wrapper/gradle-wrapper.properties
+++ /dev/null
@@ -1,6 +0,0 @@
-#Thu Jul 11 22:18:11 PDT 2013
-distributionBase=GRADLE_USER_HOME
-distributionPath=wrapper/dists
-zipStoreBase=GRADLE_USER_HOME
-zipStorePath=wrapper/dists
-distributionUrl=http\://services.gradle.org/distributions/gradle-1.6-bin.zip
diff --git a/gradlew b/gradlew
index c312b910b570f..91a7e269e19df 100755
--- a/gradlew
+++ b/gradlew
@@ -7,7 +7,7 @@
 ##############################################################################
 
 # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS="-Xmx1024m -Xms256m -XX:MaxPermSize=512m"
+DEFAULT_JVM_OPTS=""
 
 APP_NAME="Gradle"
 APP_BASE_NAME=`basename "$0"`
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 0000000000000..aec99730b4e8f
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,90 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windowz variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+if "%@eval[2+2]" == "4" goto 4NT_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+goto execute
+
+:4NT_args
+@rem Get arguments from the 4NT Shell from JP Software
+set CMD_LINE_ARGS=%$
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/kafka-patch-review.py b/kafka-patch-review.py
index dc6664d22a0b1..b7f132f9d210b 100644
--- a/kafka-patch-review.py
+++ b/kafka-patch-review.py
@@ -2,22 +2,32 @@
 
 import argparse
 import sys
-import os 
+import os
 import time
 import datetime
 import tempfile
+import commands
 from jira.client import JIRA
 
-def get_jira():
-  options = {
-    'server': 'https://issues.apache.org/jira'
-  }
+def get_jira_config():
   # read the config file
   home=jira_home=os.getenv('HOME')
   home=home.rstrip('/')
   jira_config = dict(line.strip().split('=') for line in open(home + '/jira.ini'))
-  jira = JIRA(options,basic_auth=(jira_config['user'], jira_config['password']))
-  return jira 
+  return jira_config
+
+def get_jira():
+  options = {
+    'server': 'https://issues.apache.org/jira'
+  }
+
+  jira_config = get_jira_config()
+  jira = JIRA(options=options,basic_auth=(jira_config['user'], jira_config['password']))
+  return jira
+
+def cmd_exists(cmd):
+  status, result = commands.getstatusoutput(cmd)
+  return status
 
 def main():
   ''' main(), shut up, pylint '''
@@ -31,6 +41,15 @@ def main():
   popt.add_argument('-db', '--debug', action='store_true', required=False, help='Enable debug mode')
   opt = popt.parse_args()
 
+  post_review_tool = None
+  if (cmd_exists("post-review") == 0):
+    post_review_tool = "post-review"
+  elif (cmd_exists("rbt") == 0):
+    post_review_tool = "rbt post"
+  else:
+    print "please install RBTools"
+    sys.exit(1)
+
   patch_file=tempfile.gettempdir() + "/" + opt.jira + ".patch"
   if opt.reviewboard:
     ts = time.time()
@@ -62,9 +81,9 @@ def main():
   p=os.popen(git_remote_update)
   p.close()
 
-  rb_command="post-review --publish --tracking-branch " + opt.branch + " --target-groups=kafka --bugs-closed=" + opt.jira
+  rb_command= post_review_tool + " --publish --tracking-branch " + opt.branch + " --target-groups=kafka --bugs-closed=" + opt.jira
   if opt.debug:
-    rb_command=rb_command + " --debug" 
+    rb_command=rb_command + " --debug"
   summary="Patch for " + opt.jira
   if opt.summary:
     summary=opt.summary
@@ -91,11 +110,13 @@ def main():
       print 'ERROR: Your reviewboard was not created/updated. Please run the script with the --debug option to troubleshoot the problem'
       p.close()
       sys.exit(1)
-  p.close()
-  if opt.debug: 
+  if p.close() != None:
+    print 'ERROR: reviewboard update failed. Exiting.'
+    sys.exit(1)
+  if opt.debug:
     print 'rb url=',rb_url
- 
-  git_command="git diff " + opt.branch + " > " + patch_file
+
+  git_command="git format-patch " + opt.branch + " --stdout > " + patch_file
   if opt.debug:
     print git_command
   p=os.popen(git_command)
@@ -108,16 +129,28 @@ def main():
   jira.add_attachment(issue,attachment)
   attachment.close()
 
-  comment="Created reviewboard " 
+  comment="Created reviewboard "
   if not opt.reviewboard:
-    print 'Created a new reviewboard ',rb_url,
+    print 'Created a new reviewboard',rb_url,
   else:
-    print 'Updated reviewboard'
+    print 'Updated reviewboard',rb_url
     comment="Updated reviewboard "
 
-  comment = comment + rb_url + ' against branch ' + opt.branch 
+  comment = comment + rb_url + ' against branch ' + opt.branch
   jira.add_comment(opt.jira, comment)
 
+  #update the JIRA status to PATCH AVAILABLE
+  transitions = jira.transitions(issue)
+  transitionsMap ={}
+  
+  for t in transitions:
+    transitionsMap[t['name']] = t['id']
+
+  jira_config = get_jira_config()
+
+  if('Submit Patch' in transitionsMap):
+     jira.transition_issue(issue, transitionsMap['Submit Patch'] , assignee={'name': jira_config['user']} )
+
+
 if __name__ == '__main__':
   sys.exit(main())
-
diff --git a/lib/apache-rat-0.8.jar b/lib/apache-rat-0.8.jar
deleted file mode 100644
index bdc43726d6369..0000000000000
Binary files a/lib/apache-rat-0.8.jar and /dev/null differ
diff --git a/lib/sbt-launch.jar b/lib/sbt-launch.jar
deleted file mode 100644
index 06ad8d880592a..0000000000000
Binary files a/lib/sbt-launch.jar and /dev/null differ
diff --git a/perf/build.sbt b/perf/build.sbt
deleted file mode 100644
index 8aa72f4a13eda..0000000000000
--- a/perf/build.sbt
+++ /dev/null
@@ -1 +0,0 @@
-name := "kafka-perf"
diff --git a/perf/config/log4j.properties b/perf/config/log4j.properties
deleted file mode 100644
index 542b7391550b2..0000000000000
--- a/perf/config/log4j.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-log4j.rootLogger=INFO, fileAppender
-
-log4j.appender.fileAppender=org.apache.log4j.FileAppender
-log4j.appender.fileAppender.File=perf.log
-log4j.appender.fileAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.fileAppender.layout.ConversionPattern=%m %n 
-
-# Turn on all our debugging info
-log4j.logger.kafka=INFO
-
diff --git a/project/Build.scala b/project/Build.scala
deleted file mode 100644
index ddcfc4176e689..0000000000000
--- a/project/Build.scala
+++ /dev/null
@@ -1,152 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import sbt._
-import Keys._
-import Process._
-
-import scala.xml.{Node, Elem}
-import scala.xml.transform.{RewriteRule, RuleTransformer}
-
-object KafkaBuild extends Build {
-  val buildNumber = SettingKey[String]("build-number", "Build number defaults to $BUILD_NUMBER environment variable")
-  val releaseName = SettingKey[String]("release-name", "the full name of this release")
-  val commonSettings = Seq(
-    organization := "org.apache.kafka",
-    pomExtra :=
-<parent>
-  <groupId>org.apache</groupId>
-  <artifactId>apache</artifactId>
-  <version>10</version>
-</parent>    
-<licenses>
-  <license>
-    <name>Apache 2</name>
-    <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-    <distribution>repo</distribution>
-  </license>
-</licenses>,
-    scalacOptions ++= Seq("-deprecation", "-unchecked", "-g:none"),
-    crossScalaVersions := Seq("2.8.0","2.8.2", "2.9.1", "2.9.2", "2.10.1"),
-    excludeFilter in unmanagedSources <<= scalaVersion(v => if (v.startsWith("2.8")) "*_2.9+.scala" else "*_2.8.scala"),
-    scalaVersion := "2.8.0",
-    version := "0.8.1",
-    publishTo := Some("Apache Maven Repo" at "https://repository.apache.org/service/local/staging/deploy/maven2"),
-    credentials += Credentials(Path.userHome / ".m2" / ".credentials"),
-    buildNumber := System.getProperty("build.number", ""),
-    version <<= (buildNumber, version)  { (build, version)  => if (build == "") version else version + "+" + build},
-    releaseName <<= (name, version, scalaVersion) {(name, version, scalaVersion) => name + "_" + scalaVersion + "-" + version},
-    javacOptions in compile ++= Seq("-Xlint:unchecked", "-source", "1.5"),
-    javacOptions in doc ++= Seq("-source", "1.5"),
-    parallelExecution in Test := false, // Prevent tests from overrunning each other
-    publishArtifact in Test := true,
-    libraryDependencies ++= Seq(
-      "log4j"                 % "log4j"        % "1.2.15" exclude("javax.jms", "jms"),
-      "net.sf.jopt-simple"    % "jopt-simple"  % "3.2",
-      "org.slf4j"             % "slf4j-simple" % "1.6.4"
-    ),
-    // The issue is going from log4j 1.2.14 to 1.2.15, the developers added some features which required
-    // some dependencies on various sun and javax packages.
-    ivyXML := <dependencies>
-        <exclude module="javax"/>
-        <exclude module="jmxri"/>
-        <exclude module="jmxtools"/>
-        <exclude module="mail"/>
-        <exclude module="jms"/>
-        <dependency org="org.apache.zookeeper" name="zookeeper" rev="3.3.4">
-          <exclude org="log4j" module="log4j"/>
-          <exclude org="jline" module="jline"/>
-        </dependency>
-      </dependencies>,
-      mappings in packageBin in Compile += file("LICENSE") -> "LICENSE",
-      mappings in packageBin in Compile += file("NOTICE") -> "NOTICE"
-  )
-
-  val hadoopSettings = Seq(
-    javacOptions in compile ++= Seq("-Xlint:deprecation"),
-    libraryDependencies ++= Seq(
-      "org.apache.avro"      % "avro"               % "1.4.0",
-      "org.apache.pig"       % "pig"                % "0.8.0",
-      "commons-logging"      % "commons-logging"    % "1.0.4",
-      "org.codehaus.jackson" % "jackson-core-asl"   % "1.5.5",
-      "org.codehaus.jackson" % "jackson-mapper-asl" % "1.5.5",
-      "org.apache.hadoop"    % "hadoop-core"        % "0.20.2"
-    ),
-    ivyXML := 
-       <dependencies>
-         <exclude module="netty"/>
-         <exclude module="javax"/>
-         <exclude module="jmxri"/>
-         <exclude module="jmxtools"/>
-         <exclude module="mail"/>
-         <exclude module="jms"/>
-         <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2">
-           <exclude org="junit" module="junit"/>
-         </dependency>
-         <dependency org="org.apache.pig" name="pig" rev="0.8.0">
-           <exclude org="junit" module="junit"/>
-         </dependency>
-       </dependencies>
-  )
-
-
-  val runRat = TaskKey[Unit]("run-rat-task", "Runs Apache rat on Kafka")
-  val runRatTask = runRat := {
-    "bin/run-rat.sh" !
-  }
-
-  val release = TaskKey[Unit]("release", "Creates a deployable release directory file with dependencies, config, and scripts.")
-  val releaseTask = release <<= ( packageBin in (core, Compile), dependencyClasspath in (core, Runtime), exportedProducts in Compile,
-    target, releaseName in core ) map { (packageBin, deps, products, target, releaseName) =>
-      val jarFiles = deps.files.filter(f => !products.files.contains(f) && f.getName.endsWith(".jar"))
-      val destination = target / "RELEASE" / releaseName
-      IO.copyFile(packageBin, destination / packageBin.getName)
-      IO.copyFile(file("LICENSE"), destination / "LICENSE")
-      IO.copyFile(file("NOTICE"), destination / "NOTICE")            
-      IO.copy(jarFiles.map { f => (f, destination / "libs" / f.getName) })
-      IO.copyDirectory(file("config"), destination / "config")
-      IO.copyDirectory(file("bin"), destination / "bin")
-      for {file <- (destination / "bin").listFiles} { file.setExecutable(true, true) }
-  }
-
-  val releaseZip = TaskKey[Unit]("release-zip", "Creates a deployable zip file with dependencies, config, and scripts.")
-  val releaseZipTask = releaseZip <<= (release, target, releaseName in core) map { (release, target, releaseName) => 
-    val zipPath = target / "RELEASE" / "%s.zip".format(releaseName)
-    IO.delete(zipPath)
-    IO.zip((target/"RELEASE" ** releaseName ***) x relativeTo(target/"RELEASE"), zipPath)
-  }
-
-  val releaseTar = TaskKey[Unit]("release-tar", "Creates a deployable tar.gz file with dependencies, config, and scripts.")
-  val releaseTarTask = releaseTar <<= ( release, target, releaseName in core) map { (release, target, releaseName) =>
-    Process(Seq("tar", "czf", "%s.tar.gz".format(releaseName), releaseName), target / "RELEASE").! match {
-        case 0 => ()
-        case n => sys.error("Failed to run native tar application!")
-      }
-  }
-
-  lazy val kafka    = Project(id = "Kafka", base = file(".")).aggregate(core, examples, contrib, perf).settings((commonSettings ++
-    runRatTask ++ releaseTask ++ releaseZipTask ++ releaseTarTask): _*)
-  lazy val core     = Project(id = "core", base = file("core")).settings(commonSettings: _*)
-  lazy val examples = Project(id = "java-examples", base = file("examples")).settings(commonSettings :_*) dependsOn (core)
-  lazy val perf     = Project(id = "perf", base = file("perf")).settings((Seq(name := "kafka-perf") ++ commonSettings):_*) dependsOn (core)
-
-  lazy val contrib        = Project(id = "contrib", base = file("contrib")).aggregate(hadoopProducer, hadoopConsumer).settings(commonSettings :_*)
-  lazy val hadoopProducer = Project(id = "hadoop-producer", base = file("contrib/hadoop-producer")).settings(hadoopSettings ++ commonSettings: _*) dependsOn (core)
-  lazy val hadoopConsumer = Project(id = "hadoop-consumer", base = file("contrib/hadoop-consumer")).settings(hadoopSettings ++ commonSettings: _*) dependsOn (core)
-  lazy val clients = Project(id = "kafka-clients", base = file("clients"))
-
-}
diff --git a/project/build/KafkaProject.scala b/project/build/KafkaProject.scala
deleted file mode 100644
index f29c67a47fe89..0000000000000
--- a/project/build/KafkaProject.scala
+++ /dev/null
@@ -1,251 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import sbt._
-import scala.xml.{Node, Elem}
-import scala.xml.transform.{RewriteRule, RuleTransformer}
-
-class KafkaProject(info: ProjectInfo) extends ParentProject(info) with IdeaProject {
-  override def managedStyle = ManagedStyle.Maven
-  val publishTo = "Maven Repo" at "http://maven/content/repositories/repository.snapshots"
-  Credentials(Path.userHome / ".m2" / ".credentials", log)
-
-  lazy val core = project("core", "core-kafka", new CoreKafkaProject(_))
-  lazy val examples = project("examples", "java-examples", new KafkaExamplesProject(_), core)
-  lazy val contrib = project("contrib", "contrib", new ContribProject(_))
-  lazy val perf = project("perf", "perf", new KafkaPerfProject(_))
-
-  lazy val releaseZipTask = core.packageDistTask
-
-  val releaseZipDescription = "Compiles every sub project, runs unit tests, creates a deployable release zip file with dependencies, config, and scripts."
-  lazy val releaseZip = releaseZipTask dependsOn(core.corePackageAction, core.test, examples.examplesPackageAction,
-    contrib.producerPackageAction, contrib.consumerPackageAction) describedAs releaseZipDescription
-
-  val runRatDescription = "Runs Apache rat on Kafka"
-  lazy val runRatTask = task {
-    Runtime.getRuntime().exec("bin/run-rat.sh")
-    None
-  } describedAs runRatDescription
-
-  val rat = "org.apache.rat" % "apache-rat" % "0.8"
-
-  class CoreKafkaProject(info: ProjectInfo) extends DefaultProject(info)
-     with IdeaProject with CoreDependencies with TestDependencies with CompressionDependencies {
-   val corePackageAction = packageAllAction
-
-  //The issue is going from log4j 1.2.14 to 1.2.15, the developers added some features which required
-  // some dependencies on various sun and javax packages.
-   override def ivyXML =
-    <dependencies>
-      <exclude module="javax"/>
-      <exclude module="jmxri"/>
-      <exclude module="jmxtools"/>
-      <exclude module="mail"/>
-      <exclude module="jms"/>
-      <dependency org="org.apache.zookeeper" name="zookeeper" rev="3.3.4">
-        <exclude module="log4j"/>
-        <exclude module="jline"/>
-      </dependency>
-    </dependencies>
-
-    override def organization = "org.apache"
-    override def filterScalaJars = false
-
-    // build the executable jar's classpath.
-    // (why is it necessary to explicitly remove the target/{classes,resources} paths? hm.)
-    def dependentJars = {
-      val jars =
-      publicClasspath +++ mainDependencies.scalaJars --- mainCompilePath --- mainResourcesOutputPath
-      if (jars.get.find { jar => jar.name.startsWith("scala-library-") }.isDefined) {
-        // workaround bug in sbt: if the compiler is explicitly included, don't include 2 versions
-        // of the library.
-        jars --- jars.filter { jar =>
-          jar.absolutePath.contains("/boot/") && jar.name == "scala-library.jar"
-        }
-      } else {
-        jars
-      }
-    }
-
-    def dependentJarNames = dependentJars.getFiles.map(_.getName).filter(_.endsWith(".jar"))
-    override def manifestClassPath = Some(dependentJarNames.map { "libs/" + _ }.mkString(" "))
-
-    def distName = (artifactID + "-" + projectVersion.value)
-    def distPath = "dist" / distName ##
-
-    def configPath = "config" ##
-    def configOutputPath = distPath / "config"
-
-    def binPath = "bin" ##
-    def binOutputPath = distPath / "bin"
-
-    def distZipName = {
-      "%s-%s.zip".format(artifactID, projectVersion.value)
-    }
-
-    lazy val packageDistTask = task {
-      distPath.asFile.mkdirs()
-      (distPath / "libs").asFile.mkdirs()
-      binOutputPath.asFile.mkdirs()
-      configOutputPath.asFile.mkdirs()
-
-      FileUtilities.copyFlat(List(jarPath), distPath, log).left.toOption orElse
-              FileUtilities.copyFlat(dependentJars.get, distPath / "libs", log).left.toOption orElse
-              FileUtilities.copy((configPath ***).get, configOutputPath, log).left.toOption orElse
-              FileUtilities.copy((binPath ***).get, binOutputPath, log).left.toOption orElse
-              FileUtilities.zip((("dist" / distName) ##).get, "dist" / distZipName, true, log)
-      None
-    }
-
-    val PackageDistDescription = "Creates a deployable zip file with dependencies, config, and scripts."
-    lazy val packageDist = packageDistTask dependsOn(`package`, `test`) describedAs PackageDistDescription
-
-    val cleanDist = cleanTask("dist" ##) describedAs("Erase any packaged distributions.")
-    override def cleanAction = super.cleanAction dependsOn(cleanDist)
-
-    override def javaCompileOptions = super.javaCompileOptions ++
-      List(JavaCompileOption("-source"), JavaCompileOption("1.5"))
-
-    override def packageAction = super.packageAction dependsOn (testCompileAction, packageTestAction)
-
-  }
-
-  class KafkaPerfProject(info: ProjectInfo) extends DefaultProject(info)
-     with IdeaProject
-     with CoreDependencies {
-    val perfPackageAction = packageAllAction
-    val dependsOnCore = core
-
-  //The issue is going from log4j 1.2.14 to 1.2.15, the developers added some features which required
-  // some dependencies on various sun and javax packages.
-   override def ivyXML =
-    <dependencies>
-      <exclude module="javax"/>
-      <exclude module="jmxri"/>
-      <exclude module="jmxtools"/>
-      <exclude module="mail"/>
-      <exclude module="jms"/>
-    </dependencies>
-
-    override def artifactID = "kafka-perf"
-    override def filterScalaJars = false
-    override def javaCompileOptions = super.javaCompileOptions ++
-      List(JavaCompileOption("-Xlint:unchecked"))
-  }
-
-  class KafkaExamplesProject(info: ProjectInfo) extends DefaultProject(info)
-     with IdeaProject
-     with CoreDependencies {
-    val examplesPackageAction = packageAllAction
-    val dependsOnCore = core
-  //The issue is going from log4j 1.2.14 to 1.2.15, the developers added some features which required
-  // some dependencies on various sun and javax packages.
-   override def ivyXML =
-    <dependencies>
-      <exclude module="javax"/>
-      <exclude module="jmxri"/>
-      <exclude module="jmxtools"/>
-      <exclude module="mail"/>
-      <exclude module="jms"/>
-    </dependencies>
-
-    override def artifactID = "kafka-java-examples"
-    override def filterScalaJars = false
-    override def javaCompileOptions = super.javaCompileOptions ++
-      List(JavaCompileOption("-Xlint:unchecked"))
-  }
-
-  class ContribProject(info: ProjectInfo) extends ParentProject(info) with IdeaProject {
-    lazy val hadoopProducer = project("hadoop-producer", "hadoop producer",
-                                      new HadoopProducerProject(_), core)
-    lazy val hadoopConsumer = project("hadoop-consumer", "hadoop consumer",
-                                      new HadoopConsumerProject(_), core)
-
-    val producerPackageAction = hadoopProducer.producerPackageAction
-    val consumerPackageAction = hadoopConsumer.consumerPackageAction
-
-    class HadoopProducerProject(info: ProjectInfo) extends DefaultProject(info)
-      with IdeaProject
-      with CoreDependencies with HadoopDependencies {
-      val producerPackageAction = packageAllAction
-      override def ivyXML =
-       <dependencies>
-         <exclude module="netty"/>
-           <exclude module="javax"/>
-           <exclude module="jmxri"/>
-           <exclude module="jmxtools"/>
-           <exclude module="mail"/>
-           <exclude module="jms"/>
-         <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2">
-           <exclude module="junit"/>
-         </dependency>
-         <dependency org="org.apache.pig" name="pig" rev="0.10.0">
-           <exclude module="junit"/>
-         </dependency>
-       </dependencies>
-
-    }
-
-    class HadoopConsumerProject(info: ProjectInfo) extends DefaultProject(info)
-      with IdeaProject
-      with CoreDependencies {
-      val consumerPackageAction = packageAllAction
-      override def ivyXML =
-       <dependencies>
-         <exclude module="netty"/>
-           <exclude module="javax"/>
-           <exclude module="jmxri"/>
-           <exclude module="jmxtools"/>
-           <exclude module="mail"/>
-           <exclude module="jms"/>
-           <exclude module=""/>
-         <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.2">
-           <exclude module="junit"/>
-         </dependency>
-         <dependency org="org.apache.pig" name="pig" rev="0.8.0">
-           <exclude module="junit"/>
-         </dependency>
-       </dependencies>
-
-      val jodaTime = "joda-time" % "joda-time" % "1.6"
-    }
-  }
-
-  trait TestDependencies {
-    val easymock = "org.easymock" % "easymock" % "3.0" % "test"
-    val junit = "junit" % "junit" % "4.1" % "test"
-    val scalaTest = "org.scalatest" % "scalatest" % "1.2" % "test"
-  }
-
-  trait CoreDependencies {
-    val log4j = "log4j" % "log4j" % "1.2.15"
-    val jopt = "net.sf.jopt-simple" % "jopt-simple" % "3.2"
-    val slf4jSimple = "org.slf4j" % "slf4j-simple" % "1.6.4"
-  }
-  
-  trait HadoopDependencies {
-    val avro = "org.apache.avro" % "avro" % "1.4.0"
-    val commonsLogging = "commons-logging" % "commons-logging" % "1.0.4"
-    val jacksonCore = "org.codehaus.jackson" % "jackson-core-asl" % "1.5.5"
-    val jacksonMapper = "org.codehaus.jackson" % "jackson-mapper-asl" % "1.5.5"
-    val hadoop = "org.apache.hadoop" % "hadoop-core" % "0.20.2"
-  }
-
-  trait CompressionDependencies {
-    val snappy = "org.xerial.snappy" % "snappy-java" % "1.0.5"
-  }
-}
diff --git a/project/plugins.sbt b/project/plugins.sbt
deleted file mode 100644
index 7abe703893ba0..0000000000000
--- a/project/plugins.sbt
+++ /dev/null
@@ -1,9 +0,0 @@
-resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns)
-
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.8")
-
-addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0")
-
-resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns)
-
-addSbtPlugin("com.jsuereth" % "xsbt-gpg-plugin" % "0.6")
\ No newline at end of file
diff --git a/scala.gradle b/scala.gradle
new file mode 100644
index 0000000000000..3c6f38ad035b3
--- /dev/null
+++ b/scala.gradle
@@ -0,0 +1,11 @@
+if (!hasProperty('scalaVersion')) {
+  ext.scalaVersion = '2.10.4'
+}
+ext.defaultScalaVersion = '2.10.4'
+if (scalaVersion.startsWith('2.10')) {
+    ext.baseScalaVersion = '2.10'
+} else if (scalaVersion.startsWith('2.11')) {
+    ext.baseScalaVersion = '2.11'
+} else {
+    ext.baseScalaVersion = scalaVersion
+}
diff --git a/settings.gradle b/settings.gradle
index 74e591a8a4afc..83f764e6a4a15 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -13,4 +13,5 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-include 'core', 'perf', 'contrib:hadoop-consumer', 'contrib:hadoop-producer', 'examples', 'clients'
+apply from: file('scala.gradle')
+include 'core', 'contrib:hadoop-consumer', 'contrib:hadoop-producer', 'examples', 'clients'
diff --git a/system_test/README.txt b/system_test/README.txt
index d89ad9d7d5f0c..0e469e373c9d9 100644
--- a/system_test/README.txt
+++ b/system_test/README.txt
@@ -48,15 +48,14 @@ The framework has the following levels:
 # ==========================
 
 * Please note that the following commands should be executed after downloading the kafka source code to build all the required binaries:
-  1. <kafka install dir>/ $ ./sbt update package
+  1. <kafka install dir>/ $ ./gradlew jar
 
   Now you are ready to follow the steps below.
   1. Update system_test/cluster_config.json for "kafka_home" & "java_home" specific to your environment
   2. Edit system_test/replication_testsuite/testcase_1/testcase_1_properties.json and update "broker-list" to the proper settings of your environment. (If this test is to be run in a single localhost, no change is required for this.)
   3. To run the test, go to <kafka_home>/system_test and run the following command:
-     $ python -B system_test_runner.py 
-  4. To turn on debugging, update system_test/system_test_runner.py and uncomment the following line:
-         namedLogger.setLevel(logging.DEBUG)
+     $ python -u -B system_test_runner.py 2>&1 | tee system_test_output.log
+  4. To turn on debugging, update system_test/logging.conf by changing the level in handlers session from INFO to DEBUG.
 
 # ==========================
 # Adding Test Case
@@ -76,4 +75,7 @@ To create a new test case under "replication_testsuite", please do the following
   2. Rename system_test/replication_testsuite/testcase_2/testcase_1_properties.json => system_test/replication_testsuite/testcase_2/testcase_2_properties.json
   3. Update system_test/replication_testsuite/testcase_2/testcase_2_properties.json with the corresponding settings for testcase 2.
 
-
+Note:
+The following testcases are for the old producer and the old mirror maker. We can remove them once we phase out the old producer client.
+  replication_testsuite: testcase_{10101 - 10110} testcase_{10131 - 10134}
+  mirror_maker_testsuite: testcase_{15001 - 15006}
diff --git a/system_test/broker_failure/bin/run-test.sh b/system_test/broker_failure/bin/run-test.sh
index 1f11180e33974..549cd1f4eafd8 100755
--- a/system_test/broker_failure/bin/run-test.sh
+++ b/system_test/broker_failure/bin/run-test.sh
@@ -5,9 +5,9 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,7 +17,7 @@
 # ===========
 # run-test.sh
 # ===========
- 
+
 # ====================================
 # Do not change the followings
 # (keep this section at the beginning
@@ -52,9 +52,9 @@ readonly source_console_consumer_grp=source
 readonly target_console_consumer_grp=target
 readonly message_size=100
 readonly console_consumer_timeout_ms=15000
-readonly num_kafka_source_server=4                   # requires same no. of property files such as: 
+readonly num_kafka_source_server=4                   # requires same no. of property files such as:
                                                      # $base_dir/config/server_source{1..4}.properties
-readonly num_kafka_target_server=3                   # requires same no. of property files such as: 
+readonly num_kafka_target_server=3                   # requires same no. of property files such as:
                                                      # $base_dir/config/server_target{1..3}.properties
 readonly num_kafka_mirror_maker=3                    # any values greater than 0
 readonly wait_time_after_killing_broker=0            # wait after broker is stopped but before starting again
@@ -65,8 +65,8 @@ readonly wait_time_after_restarting_broker=10
 # ====================================
 num_msg_per_batch=500                                # no. of msg produced in each calling of ProducerPerformance
 num_producer_threads=5                               # no. of producer threads to send msg
-producer_sleep_min=5                                 # min & max sleep time (in sec) between each 
-producer_sleep_max=5                                 # batch of messages sent from producer 
+producer_sleep_min=5                                 # min & max sleep time (in sec) between each
+producer_sleep_max=5                                 # batch of messages sent from producer
 
 # ====================================
 # zookeeper
@@ -255,7 +255,7 @@ create_topic() {
         --topic $this_topic_to_create \
         --zookeeper $this_zk_conn_str \
         --replica $this_replica_factor \
-        2> $kafka_topic_creation_log_file 
+        2> $kafka_topic_creation_log_file
 }
 
 # =========================================
@@ -281,7 +281,7 @@ start_zk() {
 start_source_servers_cluster() {
     info "starting source cluster"
 
-    for ((i=1; i<=$num_kafka_source_server; i++)) 
+    for ((i=1; i<=$num_kafka_source_server; i++))
     do
         start_source_server $i
     done
@@ -367,13 +367,13 @@ start_console_consumer() {
 
     info "starting console consumers for $this_consumer_grp"
 
-    $base_dir/bin/kafka-run-class.sh kafka.consumer.ConsoleConsumer \
+    $base_dir/bin/kafka-run-class.sh kafka.tools.ConsoleConsumer \
         --zookeeper localhost:$this_consumer_zk_port \
         --topic $test_topic \
         --group $this_consumer_grp \
         --from-beginning \
         --consumer-timeout-ms $console_consumer_timeout_ms \
-        --formatter "kafka.consumer.ConsoleConsumer\$${this_msg_formatter}" \
+        --formatter "kafka.tools.ConsoleConsumer\$${this_msg_formatter}" \
         2>&1 > ${this_consumer_log} &
     console_consumer_pid=$!
 
@@ -448,7 +448,7 @@ start_background_producer() {
 
         info "producing $num_msg_per_batch messages on topic '$topic'"
         $base_dir/bin/kafka-run-class.sh \
-            kafka.perf.ProducerPerformance \
+            kafka.tools.ProducerPerformance \
             --brokerinfo zk.connect=localhost:2181 \
             --topics $topic \
             --messages $num_msg_per_batch \
@@ -499,7 +499,7 @@ cmp_checksum() {
 
     crc_only_in_producer=`comm -23 $producer_performance_mid_sorted_uniq_log $console_consumer_source_mid_sorted_uniq_log`
 
-    duplicate_mirror_mid=`comm -23 $console_consumer_target_mid_sorted_log $console_consumer_target_mid_sorted_uniq_log` 
+    duplicate_mirror_mid=`comm -23 $console_consumer_target_mid_sorted_log $console_consumer_target_mid_sorted_uniq_log`
     no_of_duplicate_msg=$(( $msg_count_from_mirror_consumer - $uniq_msg_count_from_mirror_consumer \
                           + $msg_count_from_source_consumer - $uniq_msg_count_from_source_consumer - \
                           2*$duplicate_msg_in_producer ))
@@ -521,19 +521,19 @@ cmp_checksum() {
     echo ""
 
     echo "========================================================" >> $checksum_diff_log
-    echo "crc only in producer"                                     >> $checksum_diff_log 
+    echo "crc only in producer"                                     >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
-    echo "${crc_only_in_producer}"                                  >> $checksum_diff_log 
+    echo "${crc_only_in_producer}"                                  >> $checksum_diff_log
     echo ""                                                         >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
-    echo "crc only in source consumer"                              >> $checksum_diff_log 
+    echo "crc only in source consumer"                              >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
-    echo "${crc_only_in_source_consumer}"                           >> $checksum_diff_log 
+    echo "${crc_only_in_source_consumer}"                           >> $checksum_diff_log
     echo ""                                                         >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
     echo "crc only in mirror consumer"                              >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
-    echo "${crc_only_in_mirror_consumer}"                           >> $checksum_diff_log   
+    echo "${crc_only_in_mirror_consumer}"                           >> $checksum_diff_log
     echo ""                                                         >> $checksum_diff_log
     echo "========================================================" >> $checksum_diff_log
     echo "duplicate crc in mirror consumer"                         >> $checksum_diff_log
@@ -583,8 +583,8 @@ start_test() {
 
     info "Started background producer pid [${background_producer_pid}]"
     sleep 5
-   
-    # loop for no. of iterations specified in $num_iterations 
+
+    # loop for no. of iterations specified in $num_iterations
     while [ $num_iterations -ge $iter ]
     do
         # if $svr_to_bounce is '0', it means no bouncing
diff --git a/system_test/metrics.json b/system_test/metrics.json
index cd3fc142176b8..30dabe596871f 100644
--- a/system_test/metrics.json
+++ b/system_test/metrics.json
@@ -78,13 +78,13 @@
                {
                   "graph_name": "ProducePurgatoryQueueSize",
                   "y_label": "size",
-                  "bean_name": "kafka.server:type=ProducerRequestPurgatory,name=NumDelayedRequests",
+                  "bean_name": "kafka.server:type=ProducerRequestPurgatory,name=NumDelayedOperations",
                   "attributes": "Value"
                },
                {
                   "graph_name": "FetchPurgatoryQueueSize",
                   "y_label": "size",
-                  "bean_name": "kafka.server:type=FetchRequestPurgatory,name=NumDelayedRequests",
+                  "bean_name": "kafka.server:type=FetchRequestPurgatory,name=NumDelayedOperations",
                   "attributes": "Value"
                },
                {
diff --git a/system_test/migration_tool_testsuite/0.7/bin/kafka-run-class.sh b/system_test/migration_tool_testsuite/0.7/bin/kafka-run-class.sh
deleted file mode 100755
index ec92a343ebfee..0000000000000
--- a/system_test/migration_tool_testsuite/0.7/bin/kafka-run-class.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if [ $# -lt 1 ];
-then
-  echo "USAGE: $0 classname [opts]"
-  exit 1
-fi
-
-snappy=~/.ivy2/cache/org.xerial.snappy/snappy-java/bundles/snappy-java-1.0.5.jar
-CLASSPATH=$CLASSPATH:$snappy
-library=~/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.8.0.jar
-CLASSPATH=$CLASSPATH:$library
-compiler=~/.ivy2/cache/org.scala-lang/scala-compiler/jars/scala-compiler-2.8.0.jar
-CLASSPATH=$CLASSPATH:$compiler
-log4j=~/.ivy2/cache/log4j/log4j/jars/log4j-1.2.15.jar
-CLASSPATH=$CLASSPATH:$log4j
-slf=~/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.6.4.jar
-CLASSPATH=$CLASSPATH:$slf
-zookeeper=~/.ivy2/cache/org.apache.zookeeper/zookeeper/jars/zookeeper-3.3.4.jar
-CLASSPATH=$CLASSPATH:$zookeeper
-jopt=~/.ivy2//cache/net.sf.jopt-simple/jopt-simple/jars/jopt-simple-3.2.jar
-CLASSPATH=$CLASSPATH:$jopt
-
-base_dir=$(dirname $0)/../../../..
-kafka_07_lib_dir=$(dirname $0)/../lib
-
-# 0.8 - scala jars
-for file in $base_dir/project/boot/scala-2.8.0/lib/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-# 0.7 - kafka-0.7.jar, zkclient-0.1.jar, kafka-perf-0.7.0.jar
-for file in ${kafka_07_lib_dir}/*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-# 0.8 - metrics jar
-for file in $base_dir/core/lib/metrics*.jar;
-do
-  CLASSPATH=$CLASSPATH:$file
-done
-
-# 0.8 - misc jars
-for file in $base_dir/core/lib_managed/scala_2.8.0/compile/*.jar;
-do
-  if [ ${file##*/} != "sbt-launch.jar" ]; then
-    CLASSPATH=$CLASSPATH:$file
-  fi
-done
-if [ -z "$KAFKA_JMX_OPTS" ]; then
-  KAFKA_JMX_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false  -Dcom.sun.management.jmxremote.ssl=false "
-fi
-
-# Log4j settings
-if [ -z "$KAFKA_LOG4J_OPTS" ]; then
-  KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/config/log4j.properties"
-fi
-
-if [ -z "$KAFKA_OPTS" ]; then
-  KAFKA_OPTS="-Xmx512M -server $KAFKA_LOG4J_OPTS"
-fi
-if [  $JMX_PORT ]; then
-  KAFKA_JMX_OPTS="$KAFKA_JMX_OPTS -Dcom.sun.management.jmxremote.port=$JMX_PORT "
-fi
-if [ -z "$JAVA_HOME" ]; then
-  JAVA="java"
-else
-  JAVA="$JAVA_HOME/bin/java"
-fi
-
-$JAVA $KAFKA_OPTS $KAFKA_JMX_OPTS -cp $CLASSPATH $@
diff --git a/system_test/migration_tool_testsuite/0.7/config/test-log4j.properties b/system_test/migration_tool_testsuite/0.7/config/test-log4j.properties
deleted file mode 100644
index a3ae33f20e4b7..0000000000000
--- a/system_test/migration_tool_testsuite/0.7/config/test-log4j.properties
+++ /dev/null
@@ -1,68 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-log4j.rootLogger=INFO, stdout 
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.kafkaAppender.File=logs/server.log
-log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.stateChangeAppender.File=logs/state-change.log
-log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.requestAppender.File=logs/kafka-request.log
-log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH
-log4j.appender.controllerAppender.File=logs/controller.log
-log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout
-log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
-
-# Turn on all our debugging info
-#log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender
-#log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender
-log4j.logger.kafka.perf=DEBUG, kafkaAppender
-log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender
-#log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG
-log4j.logger.kafka=INFO, kafkaAppender
-
-log4j.logger.kafka.network.RequestChannel$=TRACE, requestAppender
-log4j.additivity.kafka.network.RequestChannel$=false
-
-#log4j.logger.kafka.network.Processor=TRACE, requestAppender
-#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender
-#log4j.additivity.kafka.server.KafkaApis=false
-log4j.logger.kafka.request.logger=TRACE, requestAppender
-log4j.additivity.kafka.request.logger=false
-
-log4j.logger.kafka.controller=TRACE, controllerAppender
-log4j.additivity.kafka.controller=false
-
-log4j.logger.state.change.logger=TRACE, stateChangeAppender
-log4j.additivity.state.change.logger=false
-
-
diff --git a/system_test/migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar b/system_test/migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar
deleted file mode 100644
index 982ddb953aaa1..0000000000000
Binary files a/system_test/migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar and /dev/null differ
diff --git a/system_test/migration_tool_testsuite/0.7/lib/kafka-perf-0.7.0.jar b/system_test/migration_tool_testsuite/0.7/lib/kafka-perf-0.7.0.jar
deleted file mode 100644
index d4f89d54907af..0000000000000
Binary files a/system_test/migration_tool_testsuite/0.7/lib/kafka-perf-0.7.0.jar and /dev/null differ
diff --git a/system_test/migration_tool_testsuite/0.7/lib/zkclient-0.1.jar b/system_test/migration_tool_testsuite/0.7/lib/zkclient-0.1.jar
deleted file mode 100644
index aebcc3491adb3..0000000000000
Binary files a/system_test/migration_tool_testsuite/0.7/lib/zkclient-0.1.jar and /dev/null differ
diff --git a/system_test/migration_tool_testsuite/__init__.py b/system_test/migration_tool_testsuite/__init__.py
deleted file mode 100644
index 8d1c8b69c3fce..0000000000000
--- a/system_test/migration_tool_testsuite/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
- 
diff --git a/system_test/migration_tool_testsuite/config/migration_consumer.properties b/system_test/migration_tool_testsuite/config/migration_consumer.properties
deleted file mode 100644
index 184f1dea16475..0000000000000
--- a/system_test/migration_tool_testsuite/config/migration_consumer.properties
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.consumer.ConsumerConfig for more details
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=127.0.0.1:2188
-
-# timeout in ms for connecting to zookeeper
-zk.connectiontimeout.ms=1000000
-
-#consumer group id
-groupid=test-consumer-group
-
-#consumer timeout
-#consumer.timeout.ms=5000
diff --git a/system_test/migration_tool_testsuite/config/migration_producer.properties b/system_test/migration_tool_testsuite/config/migration_producer.properties
deleted file mode 100644
index 17b5928a1213f..0000000000000
--- a/system_test/migration_tool_testsuite/config/migration_producer.properties
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.producer.ProducerConfig for more details
-
-############################# Producer Basics #############################
-
-# need to set either broker.list or zk.connect
-
-# configure brokers statically
-# format: host1:port1,host2:port2 ...
-metadata.broker.list=localhost:9094,localhost:9095,localhost:9096
-
-# discover brokers from ZK
-#zk.connect=
-
-# zookeeper session timeout; default is 6000
-#zk.session.timeout.ms=
-
-# the max time that the client waits to establish a connection to zookeeper; default is 6000
-#zk.connection.timeout.ms
-
-# name of the partitioner class for partitioning events; default partition spreads data randomly
-#partitioner.class=
-
-# specifies whether the messages are sent asynchronously (async) or synchronously (sync)
-producer.type=sync
-
-# specify the compression codec for all data generated: 0: no compression, 1: gzip
-compression.codec=0
-
-# message encoder
-serializer.class=kafka.serializer.DefaultEncoder
-
-# allow topic level compression
-#compressed.topics=
-
-############################# Async Producer #############################
-# maximum time, in milliseconds, for buffering data on the producer queue 
-#queue.buffering.max.ms=
-
-# the maximum size of the blocking queue for buffering on the producer 
-#queue.buffering.max.messages=
-
-# Timeout for event enqueue:
-# 0: events will be enqueued immediately or dropped if the queue is full
-# -ve: enqueue will block indefinitely if the queue is full
-# +ve: enqueue will block up to this many milliseconds if the queue is full
-#queue.enqueue.timeout.ms=
-
-# the number of messages batched at the producer 
-#batch.num.messages=
-
-message.send.max.retries=3
-request.required.acks=1
diff --git a/system_test/migration_tool_testsuite/testcase_9001/testcase_9001_properties.json b/system_test/migration_tool_testsuite/testcase_9001/testcase_9001_properties.json
deleted file mode 100644
index 608e3bdf79cc4..0000000000000
--- a/system_test/migration_tool_testsuite/testcase_9001/testcase_9001_properties.json
+++ /dev/null
@@ -1,125 +0,0 @@
-{
-  "description": {"01":"To Test : 'Replication with Migration Tool'",
-                  "02":"Set up 2 clusters such as : SOURCE => Migration Tool => TARGET",
-                  "03":"Produce and consume messages to a single topic - single partition.",
-                  "04":"This test sends messages to 3 replicas",
-                  "05":"At the end it verifies the log size and contents",
-                  "06":"Use a consumer to verify no message loss in TARGET cluster.",
-                  "07":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "08":"Log segment size    : 51200"
-  },
-  "testcase_args": {
-    "bounce_migration_tool": "false",
-    "replica_factor": "3",
-    "num_partition": "1",
-    "num_iteration": "1",
-    "sleep_seconds_between_producer_calls": "1",
-    "message_producing_free_time_sec": "30",
-    "num_messages_to_produce_per_producer_call": "50"
-  },
-  "entities": [
-    {
-      "entity_id": "0",
-      "clientPort": "2188",
-      "dataDir": "/tmp/zookeeper_0",
-      "log_filename": "zookeeper_0.log",
-      "config_filename": "zookeeper_0.properties"
-    },
-    {
-      "entity_id": "1",
-      "port": "9091",
-      "brokerid": "1",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_1_logs",
-      "log_filename": "kafka_server_1.log",
-      "config_filename": "kafka_server_1.properties"
-    },
-    {
-      "entity_id": "2",
-      "port": "9092",
-      "brokerid": "2",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_2_logs",
-      "log_filename": "kafka_server_2.log",
-      "config_filename": "kafka_server_2.properties"
-    },
-    {
-      "entity_id": "3",
-      "port": "9093",
-      "brokerid": "3",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_3_logs",
-      "log_filename": "kafka_server_3.log",
-      "config_filename": "kafka_server_3.properties"
-    },
-    {
-      "entity_id": "4",
-      "port": "9094",
-      "broker.id": "4",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_4_logs",
-      "log_filename": "kafka_server_4.log",
-      "config_filename": "kafka_server_4.properties"
-    },
-    {
-      "entity_id": "5",
-      "port": "9095",
-      "broker.id": "5",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_5_logs",
-      "log_filename": "kafka_server_5.log",
-      "config_filename": "kafka_server_5.properties"
-    },
-    {
-      "entity_id": "6",
-      "port": "9096",
-      "broker.id": "6",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_6_logs",
-      "log_filename": "kafka_server_6.log",
-      "config_filename": "kafka_server_6.properties"
-    },
-    {
-      "entity_id": "7",
-      "topic": "test_1",
-      "threads": "5",
-      "compression-codec": "0",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "-1",
-      "async": "false",
-      "log_filename": "producer_performance_7.log",
-      "config_filename": "producer_performance_7.properties"
-    },
-    {
-      "entity_id": "8",
-      "topic": "test_1",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_8.log",
-      "config_filename": "console_consumer_8.properties"
-    },
-    {
-      "entity_id": "9",
-      "clientPort": "2191",
-      "dataDir": "/tmp/zookeeper_9",
-      "log_filename": "zookeeper_9.log",
-      "config_filename": "zookeeper_9.properties"
-    },
-    {
-      "entity_id": "10",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool.log",
-      "config_filename": "migration_tool.properties"
-    }
-   ]
-}
diff --git a/system_test/migration_tool_testsuite/testcase_9003/testcase_9003_properties.json b/system_test/migration_tool_testsuite/testcase_9003/testcase_9003_properties.json
deleted file mode 100644
index 333256c8f53a6..0000000000000
--- a/system_test/migration_tool_testsuite/testcase_9003/testcase_9003_properties.json
+++ /dev/null
@@ -1,138 +0,0 @@
-{
-  "description": {"01":"To Test : 'Replication with Migration Tool'",
-                  "02":"Set up 2 clusters such as : SOURCE => Migration Tool => TARGET",
-                  "03":"Produce and consume messages to a single topic - single partition.",
-                  "04":"This test sends messages to 3 replicas",
-                  "05":"At the end it verifies the log size and contents",
-                  "06":"Use a consumer to verify no message loss in TARGET cluster.",
-                  "07":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "08":"Log segment size    : 51200"
-  },
-  "testcase_args": {
-    "bounce_migration_tool": "true",
-    "bounced_entity_downtime_sec": "30",
-    "replica_factor": "3",
-    "num_partition": "1",
-    "num_iteration": "1",
-    "sleep_seconds_between_producer_calls": "1",
-    "message_producing_free_time_sec": "30",
-    "num_messages_to_produce_per_producer_call": "50"
-  },
-  "entities": [
-    {
-      "entity_id": "0",
-      "clientPort": "2188",
-      "dataDir": "/tmp/zookeeper_0",
-      "log_filename": "zookeeper_0.log",
-      "config_filename": "zookeeper_0.properties"
-    },
-    {
-      "entity_id": "1",
-      "port": "9091",
-      "brokerid": "1",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_1_logs",
-      "log_filename": "kafka_server_1.log",
-      "config_filename": "kafka_server_1.properties"
-    },
-    {
-      "entity_id": "2",
-      "port": "9092",
-      "brokerid": "2",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_2_logs",
-      "log_filename": "kafka_server_2.log",
-      "config_filename": "kafka_server_2.properties"
-    },
-    {
-      "entity_id": "3",
-      "port": "9093",
-      "brokerid": "3",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_3_logs",
-      "log_filename": "kafka_server_3.log",
-      "config_filename": "kafka_server_3.properties"
-    },
-    {
-      "entity_id": "4",
-      "port": "9094",
-      "broker.id": "4",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_4_logs",
-      "log_filename": "kafka_server_4.log",
-      "config_filename": "kafka_server_4.properties"
-    },
-    {
-      "entity_id": "5",
-      "port": "9095",
-      "broker.id": "5",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_5_logs",
-      "log_filename": "kafka_server_5.log",
-      "config_filename": "kafka_server_5.properties"
-    },
-    {
-      "entity_id": "6",
-      "port": "9096",
-      "broker.id": "6",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_6_logs",
-      "log_filename": "kafka_server_6.log",
-      "config_filename": "kafka_server_6.properties"
-    },
-    {
-      "entity_id": "7",
-      "topic": "test_1",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "-1",
-      "async": "true",
-      "log_filename": "producer_performance_7.log",
-      "config_filename": "producer_performance_7.properties"
-    },
-    {
-      "entity_id": "8",
-      "topic": "test_1",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_8.log",
-      "config_filename": "console_consumer_8.properties"
-    },
-    {
-      "entity_id": "9",
-      "clientPort": "2191",
-      "dataDir": "/tmp/zookeeper_9",
-      "log_filename": "zookeeper_9.log",
-      "config_filename": "zookeeper_9.properties"
-    },
-    {
-      "entity_id": "10",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_10.log",
-      "config_filename": "migration_tool_10.properties"
-    },
-    {
-      "entity_id": "11",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_11.log",
-      "config_filename": "migration_tool_11.properties"
-    }
-   ]
-}
diff --git a/system_test/migration_tool_testsuite/testcase_9004/testcase_9004_properties.json b/system_test/migration_tool_testsuite/testcase_9004/testcase_9004_properties.json
deleted file mode 100644
index b2a6e85e575b4..0000000000000
--- a/system_test/migration_tool_testsuite/testcase_9004/testcase_9004_properties.json
+++ /dev/null
@@ -1,138 +0,0 @@
-{
-  "description": {"01":"To Test : 'Replication with Migration Tool'",
-                  "02":"Set up 2 clusters such as : SOURCE => Migration Tool => TARGET",
-                  "03":"Produce and consume messages to a single topic - single partition.",
-                  "04":"This test sends messages to 3 replicas",
-                  "05":"At the end it verifies the log size and contents",
-                  "06":"Use a consumer to verify no message loss in TARGET cluster.",
-                  "07":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "08":"Log segment size    : 51200"
-  },
-  "testcase_args": {
-    "bounce_migration_tool": "true",
-    "bounced_entity_downtime_sec": "30",
-    "replica_factor": "3",
-    "num_partition": "1",
-    "num_iteration": "1",
-    "sleep_seconds_between_producer_calls": "1",
-    "message_producing_free_time_sec": "30",
-    "num_messages_to_produce_per_producer_call": "50"
-  },
-  "entities": [
-    {
-      "entity_id": "0",
-      "clientPort": "2188",
-      "dataDir": "/tmp/zookeeper_0",
-      "log_filename": "zookeeper_0.log",
-      "config_filename": "zookeeper_0.properties"
-    },
-    {
-      "entity_id": "1",
-      "port": "9091",
-      "brokerid": "1",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_1_logs",
-      "log_filename": "kafka_server_1.log",
-      "config_filename": "kafka_server_1.properties"
-    },
-    {
-      "entity_id": "2",
-      "port": "9092",
-      "brokerid": "2",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_2_logs",
-      "log_filename": "kafka_server_2.log",
-      "config_filename": "kafka_server_2.properties"
-    },
-    {
-      "entity_id": "3",
-      "port": "9093",
-      "brokerid": "3",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_3_logs",
-      "log_filename": "kafka_server_3.log",
-      "config_filename": "kafka_server_3.properties"
-    },
-    {
-      "entity_id": "4",
-      "port": "9094",
-      "broker.id": "4",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_4_logs",
-      "log_filename": "kafka_server_4.log",
-      "config_filename": "kafka_server_4.properties"
-    },
-    {
-      "entity_id": "5",
-      "port": "9095",
-      "broker.id": "5",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_5_logs",
-      "log_filename": "kafka_server_5.log",
-      "config_filename": "kafka_server_5.properties"
-    },
-    {
-      "entity_id": "6",
-      "port": "9096",
-      "broker.id": "6",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_6_logs",
-      "log_filename": "kafka_server_6.log",
-      "config_filename": "kafka_server_6.properties"
-    },
-    {
-      "entity_id": "7",
-      "topic": "test_1",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "1",
-      "async": "true",
-      "log_filename": "producer_performance_7.log",
-      "config_filename": "producer_performance_7.properties"
-    },
-    {
-      "entity_id": "8",
-      "topic": "test_1",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_8.log",
-      "config_filename": "console_consumer_8.properties"
-    },
-    {
-      "entity_id": "9",
-      "clientPort": "2191",
-      "dataDir": "/tmp/zookeeper_9",
-      "log_filename": "zookeeper_9.log",
-      "config_filename": "zookeeper_9.properties"
-    },
-    {
-      "entity_id": "10",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_10.log",
-      "config_filename": "migration_tool_10.properties"
-    },
-    {
-      "entity_id": "11",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_11.log",
-      "config_filename": "migration_tool_11.properties"
-    }
-   ]
-}
diff --git a/system_test/migration_tool_testsuite/testcase_9005/testcase_9005_properties.json b/system_test/migration_tool_testsuite/testcase_9005/testcase_9005_properties.json
deleted file mode 100644
index ddbc90506902c..0000000000000
--- a/system_test/migration_tool_testsuite/testcase_9005/testcase_9005_properties.json
+++ /dev/null
@@ -1,168 +0,0 @@
-{
-  "description": {"01":"To Test : 'Replication with Migration Tool'",
-                  "02":"Set up 2 clusters such as : SOURCE => Migration Tool => TARGET",
-                  "03":"Produce and consume messages to 2 topics - 2 partitions.",
-                  "04":"This test sends messages to 3 replicas",
-                  "05":"At the end it verifies the log size and contents",
-                  "06":"Use a consumer to verify no message loss in TARGET cluster.",
-                  "07":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "08":"Log segment size    : 51200"
-  },
-  "testcase_args": {
-    "bounce_migration_tool": "true",
-    "bounced_entity_downtime_sec": "30",
-    "replica_factor": "3",
-    "num_partition": "2",
-    "num_iteration": "1",
-    "sleep_seconds_between_producer_calls": "1",
-    "message_producing_free_time_sec": "30",
-    "num_messages_to_produce_per_producer_call": "50"
-  },
-  "entities": [
-    {
-      "entity_id": "0",
-      "clientPort": "2188",
-      "dataDir": "/tmp/zookeeper_0",
-      "log_filename": "zookeeper_0.log",
-      "config_filename": "zookeeper_0.properties"
-    },
-    {
-      "entity_id": "1",
-      "clientPort": "2191",
-      "dataDir": "/tmp/zookeeper_1",
-      "log_filename": "zookeeper_1.log",
-      "config_filename": "zookeeper_1.properties"
-    },
-
-
-    {
-      "entity_id": "2",
-      "port": "9091",
-      "brokerid": "1",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_2_logs",
-      "log_filename": "kafka_server_2.log",
-      "config_filename": "kafka_server_2.properties"
-    },
-    {
-      "entity_id": "3",
-      "port": "9092",
-      "brokerid": "2",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_3_logs",
-      "log_filename": "kafka_server_3.log",
-      "config_filename": "kafka_server_3.properties"
-    },
-    {
-      "entity_id": "4",
-      "port": "9093",
-      "brokerid": "3",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_4_logs",
-      "log_filename": "kafka_server_4.log",
-      "config_filename": "kafka_server_4.properties"
-    },
-
-
-    {
-      "entity_id": "5",
-      "port": "9094",
-      "broker.id": "4",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_5_logs",
-      "log_filename": "kafka_server_5.log",
-      "config_filename": "kafka_server_5.properties"
-    },
-    {
-      "entity_id": "6",
-      "port": "9095",
-      "broker.id": "5",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_6_logs",
-      "log_filename": "kafka_server_6.log",
-      "config_filename": "kafka_server_6.properties"
-    },
-    {
-      "entity_id": "7",
-      "port": "9096",
-      "broker.id": "6",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_7_logs",
-      "log_filename": "kafka_server_7.log",
-      "config_filename": "kafka_server_7.properties"
-    },
-
-
-    {
-      "entity_id": "8",
-      "topic": "test_1",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "-1",
-      "async": "true",
-      "log_filename": "producer_performance_8.log",
-      "config_filename": "producer_performance_8.properties"
-    },
-    {
-      "entity_id": "9",
-      "topic": "test_2",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "-1",
-      "async": "true",
-      "log_filename": "producer_performance_9.log",
-      "config_filename": "producer_performance_9.properties"
-    },
-
-
-    {
-      "entity_id": "10",
-      "topic": "test_1",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_10.log",
-      "config_filename": "console_consumer_10.properties"
-    },
-    {
-      "entity_id": "11",
-      "topic": "test_2",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_11.log",
-      "config_filename": "console_consumer_11.properties"
-    },
-
-
-    {
-      "entity_id": "12",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_12.log",
-      "config_filename": "migration_tool_12.properties"
-    },
-    {
-      "entity_id": "13",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_13.log",
-      "config_filename": "migration_tool_13.properties"
-    }
-   ]
-}
diff --git a/system_test/migration_tool_testsuite/testcase_9006/testcase_9006_properties.json b/system_test/migration_tool_testsuite/testcase_9006/testcase_9006_properties.json
deleted file mode 100644
index 21b4c40e207ac..0000000000000
--- a/system_test/migration_tool_testsuite/testcase_9006/testcase_9006_properties.json
+++ /dev/null
@@ -1,168 +0,0 @@
-{
-  "description": {"01":"To Test : 'Replication with Migration Tool'",
-                  "02":"Set up 2 clusters such as : SOURCE => Migration Tool => TARGET",
-                  "03":"Produce and consume messages to 2 topics - 2 partitions.",
-                  "04":"This test sends messages to 3 replicas",
-                  "05":"At the end it verifies the log size and contents",
-                  "06":"Use a consumer to verify no message loss in TARGET cluster.",
-                  "07":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "08":"Log segment size    : 51200"
-  },
-  "testcase_args": {
-    "bounce_migration_tool": "true",
-    "bounced_entity_downtime_sec": "30",
-    "replica_factor": "3",
-    "num_partition": "2",
-    "num_iteration": "1",
-    "sleep_seconds_between_producer_calls": "1",
-    "message_producing_free_time_sec": "30",
-    "num_messages_to_produce_per_producer_call": "50"
-  },
-  "entities": [
-    {
-      "entity_id": "0",
-      "clientPort": "2188",
-      "dataDir": "/tmp/zookeeper_0",
-      "log_filename": "zookeeper_0.log",
-      "config_filename": "zookeeper_0.properties"
-    },
-    {
-      "entity_id": "1",
-      "clientPort": "2191",
-      "dataDir": "/tmp/zookeeper_1",
-      "log_filename": "zookeeper_1.log",
-      "config_filename": "zookeeper_1.properties"
-    },
-
-
-    {
-      "entity_id": "2",
-      "port": "9091",
-      "brokerid": "1",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_2_logs",
-      "log_filename": "kafka_server_2.log",
-      "config_filename": "kafka_server_2.properties"
-    },
-    {
-      "entity_id": "3",
-      "port": "9092",
-      "brokerid": "2",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_3_logs",
-      "log_filename": "kafka_server_3.log",
-      "config_filename": "kafka_server_3.properties"
-    },
-    {
-      "entity_id": "4",
-      "port": "9093",
-      "brokerid": "3",
-      "version": "0.7",
-      "log.file.size": "51200",
-      "log.dir": "/tmp/kafka_server_4_logs",
-      "log_filename": "kafka_server_4.log",
-      "config_filename": "kafka_server_4.properties"
-    },
-
-
-    {
-      "entity_id": "5",
-      "port": "9094",
-      "broker.id": "4",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_5_logs",
-      "log_filename": "kafka_server_5.log",
-      "config_filename": "kafka_server_5.properties"
-    },
-    {
-      "entity_id": "6",
-      "port": "9095",
-      "broker.id": "5",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_6_logs",
-      "log_filename": "kafka_server_6.log",
-      "config_filename": "kafka_server_6.properties"
-    },
-    {
-      "entity_id": "7",
-      "port": "9096",
-      "broker.id": "6",
-      "log.segment.bytes": "51200",
-      "log.dir": "/tmp/kafka_server_7_logs",
-      "log_filename": "kafka_server_7.log",
-      "config_filename": "kafka_server_7.properties"
-    },
-
-
-    {
-      "entity_id": "8",
-      "topic": "test_1",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "1",
-      "async": "true",
-      "log_filename": "producer_performance_8.log",
-      "config_filename": "producer_performance_8.properties"
-    },
-    {
-      "entity_id": "9",
-      "topic": "test_2",
-      "threads": "5",
-      "compression-codec": "1",
-      "message-size": "500",
-      "message": "500",
-      "request-num-acks": "1",
-      "async": "true",
-      "log_filename": "producer_performance_9.log",
-      "config_filename": "producer_performance_9.properties"
-    },
-
-
-    {
-      "entity_id": "10",
-      "topic": "test_1",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_10.log",
-      "config_filename": "console_consumer_10.properties"
-    },
-    {
-      "entity_id": "11",
-      "topic": "test_2",
-      "group.id": "mytestgroup",
-      "consumer-timeout-ms": "10000",
-      "log_filename": "console_consumer_11.log",
-      "config_filename": "console_consumer_11.properties"
-    },
-
-
-    {
-      "entity_id": "12",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_12.log",
-      "config_filename": "migration_tool_12.properties"
-    },
-    {
-      "entity_id": "13",
-      "whitelist": ".*",
-      "num.producers": "2",
-      "num.streams": "2",
-      "producer.config": "migration_tool_testsuite/config/migration_producer.properties",
-      "consumer.config": "migration_tool_testsuite/config/migration_consumer.properties",
-      "zkclient.01.jar": "migration_tool_testsuite/0.7/lib/zkclient-0.1.jar",
-      "kafka.07.jar"   : "migration_tool_testsuite/0.7/lib/kafka-0.7.0.jar",
-      "log_filename": "migration_tool_13.log",
-      "config_filename": "migration_tool_13.properties"
-    }
-   ]
-}
diff --git a/system_test/mirror_maker/README b/system_test/mirror_maker/README
deleted file mode 100644
index da53c14446162..0000000000000
--- a/system_test/mirror_maker/README
+++ /dev/null
@@ -1,22 +0,0 @@
-This test replicates messages from two source kafka clusters into one target
-kafka cluster using the mirror-maker tool.  At the end, the messages produced
-at the source brokers should match that at the target brokers.
-
-To run this test, do
-bin/run-test.sh
-
-In the event of failure, by default the brokers and zookeepers remain running
-to make it easier to debug the issue - hit Ctrl-C to shut them down. You can
-change this behavior by setting the action_on_fail flag in the script to "exit"
-or "proceed", in which case a snapshot of all the logs and directories is
-placed in the test's base directory.
-
-It is a good idea to run the test in a loop. E.g.:
-
-:>/tmp/mirrormaker_test.log
-for i in {1..10}; do echo "run $i"; ./bin/run-test.sh 2>1 >> /tmp/mirrormaker_test.log; done
-tail -F /tmp/mirrormaker_test.log
-
-grep -ic passed /tmp/mirrormaker_test.log
-grep -ic failed /tmp/mirrormaker_test.log
-
diff --git a/system_test/mirror_maker/bin/expected.out b/system_test/mirror_maker/bin/expected.out
deleted file mode 100644
index 0a1bbafcac4a1..0000000000000
--- a/system_test/mirror_maker/bin/expected.out
+++ /dev/null
@@ -1,18 +0,0 @@
-start the servers ...
-start producing messages ...
-wait for consumer to finish consuming ...
-[2011-05-17 14:49:11,605] INFO Creating async producer for broker id = 2 at localhost:9091 (kafka.producer.ProducerPool)
-[2011-05-17 14:49:11,606] INFO Creating async producer for broker id = 1 at localhost:9092 (kafka.producer.ProducerPool)
-[2011-05-17 14:49:11,607] INFO Creating async producer for broker id = 3 at localhost:9090 (kafka.producer.ProducerPool)
-thread 0: 400000 messages sent 3514012.1233 nMsg/sec 3.3453 MBs/sec
-[2011-05-17 14:49:34,382] INFO Closing all async producers (kafka.producer.ProducerPool)
-[2011-05-17 14:49:34,383] INFO Closed AsyncProducer (kafka.producer.async.AsyncProducer)
-[2011-05-17 14:49:34,384] INFO Closed AsyncProducer (kafka.producer.async.AsyncProducer)
-[2011-05-17 14:49:34,385] INFO Closed AsyncProducer (kafka.producer.async.AsyncProducer)
-Total Num Messages: 400000 bytes: 79859641 in 22.93 secs
-Messages/sec: 17444.3960
-MB/sec: 3.3214
-test passed
-stopping the servers
-bin/../../../bin/zookeeper-server-start.sh: line 9: 22584 Terminated              $(dirname $0)/kafka-run-class.sh org.apache.zookeeper.server.quorum.QuorumPeerMain $@
-bin/../../../bin/zookeeper-server-start.sh: line 9: 22585 Terminated              $(dirname $0)/kafka-run-class.sh org.apache.zookeeper.server.quorum.QuorumPeerMain $@
diff --git a/system_test/mirror_maker/bin/run-test.sh b/system_test/mirror_maker/bin/run-test.sh
deleted file mode 100644
index e5e6c08f2c153..0000000000000
--- a/system_test/mirror_maker/bin/run-test.sh
+++ /dev/null
@@ -1,357 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-readonly num_messages=10000
-readonly message_size=100
-readonly action_on_fail="proceed"
-# readonly action_on_fail="wait"
-
-readonly test_start_time="$(date +%s)"
-
-readonly base_dir=$(dirname $0)/..
-
-info() {
-    echo -e "$(date +"%Y-%m-%d %H:%M:%S") $*"
-}
-
-kill_child_processes() {
-    isTopmost=$1
-    curPid=$2
-    childPids=$(ps a -o pid= -o ppid= | grep "${curPid}$" | awk '{print $1;}')
-    for childPid in $childPids
-    do
-        kill_child_processes 0 $childPid
-    done
-    if [ $isTopmost -eq 0 ]; then
-        kill -15 $curPid 2> /dev/null
-    fi
-}
-
-cleanup() {
-    info "cleaning up"
-
-    pid_zk_source1=
-    pid_zk_source2=
-    pid_zk_target=
-    pid_kafka_source_1_1=
-    pid_kafka_source_1_2=
-    pid_kafka_source_2_1=
-    pid_kafka_source_2_2=
-    pid_kafka_target_1_1=
-    pid_kafka_target_1_2=
-    pid_producer=
-    pid_mirrormaker_1=
-    pid_mirrormaker_2=
-
-    rm -rf /tmp/zookeeper*
-
-    rm -rf /tmp/kafka*
-}
-
-begin_timer() {
-    t_begin=$(date +%s)
-}
-
-end_timer() {
-    t_end=$(date +%s)
-}
-
-start_zk() {
-    info "starting zookeepers"
-    $base_dir/../../bin/zookeeper-server-start.sh $base_dir/config/zookeeper_source_1.properties 2>&1 > $base_dir/zookeeper_source-1.log &
-    pid_zk_source1=$!
-    $base_dir/../../bin/zookeeper-server-start.sh $base_dir/config/zookeeper_source_2.properties 2>&1 > $base_dir/zookeeper_source-2.log &
-    pid_zk_source2=$!
-    $base_dir/../../bin/zookeeper-server-start.sh $base_dir/config/zookeeper_target.properties 2>&1 > $base_dir/zookeeper_target.log &
-    pid_zk_target=$!
-}
-
-start_source_servers() {
-    info "starting source cluster"
-
-    JMX_PORT=1111 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_source_1_1.properties 2>&1 > $base_dir/kafka_source-1-1.log &
-    pid_kafka_source_1_1=$!
-    JMX_PORT=2222 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_source_1_2.properties 2>&1 > $base_dir/kafka_source-1-2.log &
-    pid_kafka_source_1_2=$!
-    JMX_PORT=3333 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_source_2_1.properties 2>&1 > $base_dir/kafka_source-2-1.log &
-    pid_kafka_source_2_1=$!
-    JMX_PORT=4444 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_source_2_2.properties 2>&1 > $base_dir/kafka_source-2-2.log &
-    pid_kafka_source_2_2=$!
-}
-
-start_target_servers() {
-    info "starting mirror cluster"
-    JMX_PORT=5555 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_target_1_1.properties 2>&1 > $base_dir/kafka_target-1-1.log &
-    pid_kafka_target_1_1=$!
-    JMX_PORT=6666 $base_dir/../../bin/kafka-run-class.sh kafka.Kafka $base_dir/config/server_target_1_2.properties 2>&1 > $base_dir/kafka_target-1-2.log &
-    pid_kafka_target_1_2=$!
-}
-
-shutdown_servers() {
-    info "stopping mirror-maker"
-    if [ "x${pid_mirrormaker_1}" != "x" ]; then kill_child_processes 0 ${pid_mirrormaker_1}; fi
-    # sleep to avoid rebalancing during shutdown
-    sleep 2
-    if [ "x${pid_mirrormaker_2}" != "x" ]; then kill_child_processes 0 ${pid_mirrormaker_2}; fi
-
-    info "stopping producer"
-    if [ "x${pid_producer}" != "x" ]; then kill_child_processes 0 ${pid_producer}; fi
-
-    info "shutting down target servers"
-    if [ "x${pid_kafka_target_1_1}" != "x" ]; then kill_child_processes 0 ${pid_kafka_target_1_1}; fi
-    if [ "x${pid_kafka_target_1_2}" != "x" ]; then kill_child_processes 0 ${pid_kafka_target_1_2}; fi
-    sleep 2
-
-    info "shutting down source servers"
-    if [ "x${pid_kafka_source_1_1}" != "x" ]; then kill_child_processes 0 ${pid_kafka_source_1_1}; fi
-    if [ "x${pid_kafka_source_1_2}" != "x" ]; then kill_child_processes 0 ${pid_kafka_source_1_2}; fi
-    if [ "x${pid_kafka_source_2_1}" != "x" ]; then kill_child_processes 0 ${pid_kafka_source_2_1}; fi
-    if [ "x${pid_kafka_source_2_2}" != "x" ]; then kill_child_processes 0 ${pid_kafka_source_2_2}; fi
-
-    info "shutting down zookeeper servers"
-    if [ "x${pid_zk_target}" != "x" ]; then kill_child_processes 0 ${pid_zk_target}; fi
-    if [ "x${pid_zk_source1}" != "x" ]; then kill_child_processes 0 ${pid_zk_source1}; fi
-    if [ "x${pid_zk_source2}" != "x" ]; then kill_child_processes 0 ${pid_zk_source2}; fi
-}
-
-start_producer() {
-    topic=$1
-    zk=$2
-    info "start producing messages for topic $topic to zookeeper $zk ..."
-    $base_dir/../../bin/kafka-run-class.sh kafka.perf.ProducerPerformance --brokerinfo zk.connect=$zk --topics $topic --messages $num_messages --message-size $message_size --batch-size 200 --vary-message-size --threads 1 --reporting-interval $num_messages --async 2>&1 > $base_dir/producer_performance.log &
-    pid_producer=$!
-}
-
-# Usage: wait_partition_done ([kafka-server] [topic] [partition-id])+
-wait_partition_done() {
-    n_tuples=$(($# / 3))
-
-    i=1
-    while (($#)); do
-        kafka_server[i]=$1
-        topic[i]=$2
-        partitionid[i]=$3
-        prev_offset[i]=0
-        info "\twaiting for partition on server ${kafka_server[i]}, topic ${topic[i]}, partition ${partitionid[i]}"
-        i=$((i+1))
-        shift 3
-    done
-
-    all_done=0
-
-    # set -x
-    while [[ $all_done != 1 ]]; do
-        sleep 4
-        i=$n_tuples
-        all_done=1
-        for ((i=1; i <= $n_tuples; i++)); do
-            cur_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server ${kafka_server[i]} --topic ${topic[i]} --partition ${partitionid[i]} --time -1 --offsets 1 | tail -1)
-            if [ "x$cur_size" != "x${prev_offset[i]}" ]; then
-                all_done=0
-                prev_offset[i]=$cur_size
-            fi
-        done
-    done
-
-}
-
-cmp_logs() {
-    topic=$1
-    info "comparing source and target logs for topic $topic"
-    source_part0_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9090 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    source_part1_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9091 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    source_part2_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9092 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    source_part3_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9093 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    target_part0_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9094 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    target_part1_size=$($base_dir/../../bin/kafka-run-class.sh kafka.tools.GetOffsetShell --server kafka://localhost:9095 --topic $topic --partition 0 --time -1 --offsets 1 | tail -1)
-    if [ "x$source_part0_size" == "x" ]; then source_part0_size=0; fi
-    if [ "x$source_part1_size" == "x" ]; then source_part1_size=0; fi
-    if [ "x$source_part2_size" == "x" ]; then source_part2_size=0; fi
-    if [ "x$source_part3_size" == "x" ]; then source_part3_size=0; fi
-    if [ "x$target_part0_size" == "x" ]; then target_part0_size=0; fi
-    if [ "x$target_part1_size" == "x" ]; then target_part1_size=0; fi
-    expected_size=$(($source_part0_size + $source_part1_size + $source_part2_size + $source_part3_size))
-    actual_size=$(($target_part0_size + $target_part1_size))
-    if [ "x$expected_size" != "x$actual_size" ]
-    then
-        info "source size: $expected_size target size: $actual_size"
-        return 1
-    else
-        return 0
-    fi
-}
-
-take_fail_snapshot() {
-    snapshot_dir="$base_dir/failed-${snapshot_prefix}-${test_start_time}"
-    mkdir $snapshot_dir
-    for dir in /tmp/zookeeper_source{1..2} /tmp/zookeeper_target /tmp/kafka-source-{1..2}-{1..2}-logs /tmp/kafka-target{1..2}-logs; do
-        if [ -d $dir ]; then
-            cp -r $dir $snapshot_dir
-        fi
-    done
-}
-
-# Usage: process_test_result <result> <action_on_fail>
-# result: last test result
-# action_on_fail: (exit|wait|proceed)
-# ("wait" is useful if you want to troubleshoot using zookeeper)
-process_test_result() {
-    result=$1
-    if [ $1 -eq 0 ]; then
-        info "test passed"
-    else
-        info "test failed"
-        case "$2" in
-            "wait") info "waiting: hit Ctrl-c to quit"
-                wait
-                ;;
-            "exit") shutdown_servers
-                take_fail_snapshot
-                exit $result
-                ;;
-            *) shutdown_servers
-                take_fail_snapshot
-                info "proceeding"
-                ;;
-        esac
-    fi
-}
-
-test_whitelists() {
-    info "### Testing whitelists"
-    snapshot_prefix="whitelist-test"
-
-    cleanup
-    start_zk
-    start_source_servers
-    start_target_servers
-    sleep 4
-
-    info "starting mirror makers"
-    JMX_PORT=7777 $base_dir/../../bin/kafka-run-class.sh kafka.tools.MirrorMaker --consumer.config $base_dir/config/whitelisttest_1.consumer.properties --consumer.config $base_dir/config/whitelisttest_2.consumer.properties --producer.config $base_dir/config/mirror_producer.properties --whitelist="white.*" --num.streams 2 2>&1 > $base_dir/kafka_mirrormaker_1.log &
-    pid_mirrormaker_1=$!
-    JMX_PORT=8888 $base_dir/../../bin/kafka-run-class.sh kafka.tools.MirrorMaker --consumer.config $base_dir/config/whitelisttest_1.consumer.properties --consumer.config $base_dir/config/whitelisttest_2.consumer.properties --producer.config $base_dir/config/mirror_producer.properties --whitelist="white.*" --num.streams 2 2>&1 > $base_dir/kafka_mirrormaker_2.log &
-    pid_mirrormaker_2=$!
-
-    begin_timer
-
-    start_producer whitetopic01 localhost:2181
-    start_producer whitetopic01 localhost:2182
-    info "waiting for whitetopic01 producers to finish producing ..."
-    wait_partition_done kafka://localhost:9090 whitetopic01 0 kafka://localhost:9091 whitetopic01 0 kafka://localhost:9092 whitetopic01 0 kafka://localhost:9093 whitetopic01 0
-
-    start_producer whitetopic02 localhost:2181
-    start_producer whitetopic03 localhost:2181
-    start_producer whitetopic04 localhost:2182
-    info "waiting for whitetopic02,whitetopic03,whitetopic04 producers to finish producing ..."
-    wait_partition_done kafka://localhost:9090 whitetopic02 0 kafka://localhost:9091 whitetopic02 0 kafka://localhost:9090 whitetopic03 0 kafka://localhost:9091 whitetopic03 0 kafka://localhost:9092 whitetopic04 0 kafka://localhost:9093 whitetopic04 0
-
-    start_producer blacktopic01 localhost:2182
-    info "waiting for blacktopic01 producer to finish producing ..."
-    wait_partition_done kafka://localhost:9092 blacktopic01 0 kafka://localhost:9093 blacktopic01 0
-
-    info "waiting for consumer to finish consuming ..."
-
-    wait_partition_done kafka://localhost:9094 whitetopic01 0 kafka://localhost:9095 whitetopic01 0 kafka://localhost:9094 whitetopic02 0 kafka://localhost:9095 whitetopic02 0 kafka://localhost:9094 whitetopic03 0 kafka://localhost:9095 whitetopic03 0 kafka://localhost:9094 whitetopic04 0 kafka://localhost:9095 whitetopic04 0
-
-    end_timer
-    info "embedded consumer took $((t_end - t_begin)) seconds"
-
-    sleep 2
-
-    # if [[ -d /tmp/kafka-target-1-1-logs/blacktopic01 || /tmp/kafka-target-1-2-logs/blacktopic01 ]]; then
-    #     echo "blacktopic01 found on target cluster"
-    #     result=1
-    # else
-    #     cmp_logs whitetopic01 && cmp_logs whitetopic02 && cmp_logs whitetopic03 && cmp_logs whitetopic04
-    #     result=$?
-    # fi
-
-    cmp_logs blacktopic01
-
-    cmp_logs whitetopic01 && cmp_logs whitetopic02 && cmp_logs whitetopic03 && cmp_logs whitetopic04
-    result=$?
-
-    return $result
-}
-
-test_blacklists() {
-    info "### Testing blacklists"
-    snapshot_prefix="blacklist-test"
-    cleanup
-    start_zk
-    start_source_servers
-    start_target_servers
-    sleep 4
-
-    info "starting mirror maker"
-    $base_dir/../../bin/kafka-run-class.sh kafka.tools.MirrorMaker --consumer.config $base_dir/config/blacklisttest.consumer.properties --producer.config $base_dir/config/mirror_producer.properties --blacklist="black.*" --num.streams 2 2>&1 > $base_dir/kafka_mirrormaker_1.log &
-    pid_mirrormaker_1=$!
-
-    start_producer blacktopic01 localhost:2181
-    start_producer blacktopic02 localhost:2181
-    info "waiting for producer to finish producing blacktopic01,blacktopic02 ..."
-    wait_partition_done kafka://localhost:9090 blacktopic01 0 kafka://localhost:9091 blacktopic01 0 kafka://localhost:9090 blacktopic02 0 kafka://localhost:9091 blacktopic02 0
-
-    begin_timer
-
-    start_producer whitetopic01 localhost:2181
-    info "waiting for producer to finish producing whitetopic01 ..."
-    wait_partition_done kafka://localhost:9090 whitetopic01 0 kafka://localhost:9091 whitetopic01 0
-
-    info "waiting for consumer to finish consuming ..."
-    wait_partition_done kafka://localhost:9094 whitetopic01 0 kafka://localhost:9095 whitetopic01 0
-
-    end_timer
-
-    info "embedded consumer took $((t_end - t_begin)) seconds"
-
-    sleep 2
-
-    cmp_logs blacktopic01 || cmp_logs blacktopic02
-    if [ $? -eq 0 ]; then
-        return 1
-    fi
-    
-    cmp_logs whitetopic01
-    return $?
-}
-
-# main test begins
-
-echo "Test-$test_start_time"
-
-# Ctrl-c trap. Catches INT signal
-trap "shutdown_servers; exit 0" INT
-
-test_whitelists
-result=$?
-
-process_test_result $result $action_on_fail
-
-shutdown_servers
- 
-sleep 2
- 
-test_blacklists
-result=$?
-
-process_test_result $result $action_on_fail
-
-shutdown_servers
-
-exit $result
-
diff --git a/system_test/mirror_maker/config/blacklisttest.consumer.properties b/system_test/mirror_maker/config/blacklisttest.consumer.properties
deleted file mode 100644
index ff1201582ff1c..0000000000000
--- a/system_test/mirror_maker/config/blacklisttest.consumer.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.consumer.ConsumerConfig for more details
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2181
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-#consumer group id
-group.id=group1
-shallow.iterator.enable=true
-
diff --git a/system_test/mirror_maker/config/mirror_producer.properties b/system_test/mirror_maker/config/mirror_producer.properties
deleted file mode 100644
index aa8be6504a7a4..0000000000000
--- a/system_test/mirror_maker/config/mirror_producer.properties
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2183
-# broker.list=1:localhost:9094,2:localhost:9095
-
-# timeout in ms for connecting to zookeeper
-# zk.connection.timeout.ms=1000000
-
-producer.type=async
-
-# to avoid dropping events if the queue is full, wait indefinitely
-queue.enqueue.timeout.ms=-1
-
-num.producers.per.broker=2
-
diff --git a/system_test/mirror_maker/config/server_source_1_1.properties b/system_test/mirror_maker/config/server_source_1_1.properties
deleted file mode 100644
index 2f070a74e8bd6..0000000000000
--- a/system_test/mirror_maker/config/server_source_1_1.properties
+++ /dev/null
@@ -1,76 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=1
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9090
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-source-1-1-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=10000000
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2181
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
diff --git a/system_test/mirror_maker/config/server_source_1_2.properties b/system_test/mirror_maker/config/server_source_1_2.properties
deleted file mode 100644
index f9353e8934dcb..0000000000000
--- a/system_test/mirror_maker/config/server_source_1_2.properties
+++ /dev/null
@@ -1,76 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=2
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9091
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-source-1-2-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=536870912
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2181
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
diff --git a/system_test/mirror_maker/config/server_source_2_1.properties b/system_test/mirror_maker/config/server_source_2_1.properties
deleted file mode 100644
index daa01ad463ec1..0000000000000
--- a/system_test/mirror_maker/config/server_source_2_1.properties
+++ /dev/null
@@ -1,76 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=1
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9092
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-source-2-1-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=536870912
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2182
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
diff --git a/system_test/mirror_maker/config/server_source_2_2.properties b/system_test/mirror_maker/config/server_source_2_2.properties
deleted file mode 100644
index be6fdfcc11dca..0000000000000
--- a/system_test/mirror_maker/config/server_source_2_2.properties
+++ /dev/null
@@ -1,76 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=2
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9093
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-source-2-2-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=536870912
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2182
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
diff --git a/system_test/mirror_maker/config/server_target_1_1.properties b/system_test/mirror_maker/config/server_target_1_1.properties
deleted file mode 100644
index d37955a66dcde..0000000000000
--- a/system_test/mirror_maker/config/server_target_1_1.properties
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=1
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9094
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-target-1-1-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=536870912
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2183
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
-# topic partition count map
-# topic.partition.count.map=topic1:3, topic2:4
diff --git a/system_test/mirror_maker/config/server_target_1_2.properties b/system_test/mirror_maker/config/server_target_1_2.properties
deleted file mode 100644
index aa7546cfc8f5a..0000000000000
--- a/system_test/mirror_maker/config/server_target_1_2.properties
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.server.KafkaConfig for additional details and defaults
-
-# the id of the broker
-broker.id=2
-
-# hostname of broker. If not set, will pick up from the value returned
-# from getLocalHost.  If there are multiple interfaces getLocalHost
-# may not be what you want.
-# host.name=
-
-# number of logical partitions on this broker
-num.partitions=1
-
-# the port the socket server runs on
-port=9095
-
-# the number of processor threads the socket server uses. Defaults to the number of cores on the machine
-num.threads=8
-
-# the directory in which to store log files
-log.dir=/tmp/kafka-target-1-2-logs
-
-# the send buffer used by the socket server 
-socket.send.buffer.bytes=1048576
-
-# the receive buffer used by the socket server
-socket.receive.buffer.bytes=1048576
-
-# the maximum size of a log segment
-log.segment.bytes=536870912
-
-# the interval between running cleanup on the logs
-log.cleanup.interval.mins=1
-
-# the minimum age of a log file to eligible for deletion
-log.retention.hours=168
-
-#the number of messages to accept without flushing the log to disk
-log.flush.interval.messages=600
-
-#set the following properties to use zookeeper
-
-# enable connecting to zookeeper
-enable.zookeeper=true
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2183
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-# time based topic flush intervals in ms
-#log.flush.intervals.ms.per.topic=topic:1000
-
-# default time based flush interval in ms
-log.flush.interval.ms=1000
-
-# time based topic flasher time rate in ms
-log.flush.scheduler.interval.ms=1000
-
-# topic partition count map
-# topic.partition.count.map=topic1:3, topic2:4
diff --git a/system_test/mirror_maker/config/whitelisttest_1.consumer.properties b/system_test/mirror_maker/config/whitelisttest_1.consumer.properties
deleted file mode 100644
index ff1201582ff1c..0000000000000
--- a/system_test/mirror_maker/config/whitelisttest_1.consumer.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.consumer.ConsumerConfig for more details
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2181
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-#consumer group id
-group.id=group1
-shallow.iterator.enable=true
-
diff --git a/system_test/mirror_maker/config/whitelisttest_2.consumer.properties b/system_test/mirror_maker/config/whitelisttest_2.consumer.properties
deleted file mode 100644
index f1a902b100ad5..0000000000000
--- a/system_test/mirror_maker/config/whitelisttest_2.consumer.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# see kafka.consumer.ConsumerConfig for more details
-
-# zk connection string
-# comma separated host:port pairs, each corresponding to a zk
-# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2182
-
-# timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
-
-#consumer group id
-group.id=group1
-shallow.iterator.enable=true
-
diff --git a/system_test/mirror_maker/config/zookeeper_source_1.properties b/system_test/mirror_maker/config/zookeeper_source_1.properties
deleted file mode 100644
index f85179616ebba..0000000000000
--- a/system_test/mirror_maker/config/zookeeper_source_1.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# the directory where the snapshot is stored.
-dataDir=/tmp/zookeeper_source-1
-# the port at which the clients will connect
-clientPort=2181
diff --git a/system_test/mirror_maker/config/zookeeper_target.properties b/system_test/mirror_maker/config/zookeeper_target.properties
deleted file mode 100644
index 55a7eb189d642..0000000000000
--- a/system_test/mirror_maker/config/zookeeper_target.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-# 
-#    http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# the directory where the snapshot is stored.
-dataDir=/tmp/zookeeper_target
-# the port at which the clients will connect
-clientPort=2183
diff --git a/system_test/mirror_maker_testsuite/config/mirror_producer.properties b/system_test/mirror_maker_testsuite/config/mirror_producer.properties
index b2bf2c23b6b39..f94bebd3f5a05 100644
--- a/system_test/mirror_maker_testsuite/config/mirror_producer.properties
+++ b/system_test/mirror_maker_testsuite/config/mirror_producer.properties
@@ -1,6 +1,12 @@
-producer.type=async
-queue.enqueue.timeout.ms=-1
+# old producer
 metadata.broker.list=localhost:9094
 compression.codec=0
-message.send.max.retries=3
+request.retries=3
 request.required.acks=1
+
+# new producer
+block.on.buffer.full=true
+bootstrap.servers=localhost:9094
+compression.type=none
+retries=3
+acks=1
diff --git a/system_test/mirror_maker_testsuite/mirror_maker_test.py b/system_test/mirror_maker_testsuite/mirror_maker_test.py
index dfffb4e977ef2..48f9ff6b2810f 100644
--- a/system_test/mirror_maker_testsuite/mirror_maker_test.py
+++ b/system_test/mirror_maker_testsuite/mirror_maker_test.py
@@ -166,7 +166,7 @@ def runTest(self):
                 time.sleep(5)
 
                 self.log_message("creating topics")
-                kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv)
+                kafka_system_test_utils.create_topic_for_producer_performance(self.systemTestEnv, self.testcaseEnv)
                 self.anonLogger.info("sleeping for 5s")
                 time.sleep(5)
 
@@ -248,6 +248,7 @@ def runTest(self):
                         str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d)
                     if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]:
                         time.sleep(1)
+                        self.testcaseEnv.lock.release()
                         self.logger.info("all producer threads completed", extra=self.d)
                         break
                     time.sleep(1)
@@ -314,6 +315,7 @@ def runTest(self):
             except Exception as e:
                 self.log_message("Exception while running test {0}".format(e))
                 traceback.print_exc()
+                self.testcaseEnv.validationStatusDict["Test completed"] = "FAILED"
 
             finally:
                 if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly:
diff --git a/system_test/mirror_maker_testsuite/testcase_15001/testcase_15001_properties.json b/system_test/mirror_maker_testsuite/testcase_15001/testcase_15001_properties.json
new file mode 100644
index 0000000000000..9dd3477e70a53
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15001/testcase_15001_properties.json
@@ -0,0 +1,158 @@
+{
+  "description": {"01":"To Test : 'Replication with Mirror Maker'",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to a single topic - single partition.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:sync, acks:-1, comp:0",
+                  "09":"Log segment size    : 10240"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "false",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "500",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_11.log",
+      "config_filename": "console_consumer_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "log_filename": "mirror_maker_12.log",
+      "mirror_consumer_config_filename": "mirror_consumer_12.properties",
+      "mirror_producer_config_filename": "mirror_producer_12.properties"
+    },
+
+    {
+      "entity_id": "13",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_13.log",
+      "config_filename": "console_consumer_13.properties"
+    }
+   ]
+}
diff --git a/system_test/mirror_maker_testsuite/testcase_15002/testcase_15002_properties.json b/system_test/mirror_maker_testsuite/testcase_15002/testcase_15002_properties.json
new file mode 100644
index 0000000000000..d6495e54d6845
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15002/testcase_15002_properties.json
@@ -0,0 +1,158 @@
+{
+  "description": {"01":"Replication with Mirror Maker => Bounce Mirror Maker",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to a single topic - single partition.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:sync, acks:-1, comp:0",
+                  "09":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_11.log",
+      "config_filename": "console_consumer_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "log_filename": "mirror_maker_12.log",
+      "mirror_consumer_config_filename": "mirror_consumer_12.properties",
+      "mirror_producer_config_filename": "mirror_producer_12.properties"
+    },
+
+    {
+      "entity_id": "13",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_13.log",
+      "config_filename": "console_consumer_13.properties"
+    }
+   ]
+}
diff --git a/system_test/migration_tool_testsuite/testcase_9006/cluster_config.json b/system_test/mirror_maker_testsuite/testcase_15003/cluster_config.json
similarity index 71%
rename from system_test/migration_tool_testsuite/testcase_9006/cluster_config.json
rename to system_test/mirror_maker_testsuite/testcase_15003/cluster_config.json
index 9fcb3b0400bd8..f6fe86787f1c3 100644
--- a/system_test/migration_tool_testsuite/testcase_9006/cluster_config.json
+++ b/system_test/mirror_maker_testsuite/testcase_15003/cluster_config.json
@@ -5,68 +5,67 @@
             "hostname": "localhost",
             "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9900"
+            "jmx_port": "9100"
         },
         {
             "entity_id": "1",
             "hostname": "localhost",
             "role": "zookeeper",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9901"
+            "jmx_port": "9101"
         },
 
-
         {
             "entity_id": "2",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9902"
+            "jmx_port": "9102"
         },
         {
             "entity_id": "3",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9903"
+            "jmx_port": "9103"
         },
+
         {
             "entity_id": "4",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9904"
+            "jmx_port": "9104"
         },
-
-
         {
             "entity_id": "5",
             "hostname": "localhost",
             "role": "broker",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9905"
+            "jmx_port": "9105"
         },
         {
             "entity_id": "6",
             "hostname": "localhost",
             "role": "broker",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9906"
+            "jmx_port": "9106"
         },
+
         {
             "entity_id": "7",
             "hostname": "localhost",
@@ -74,38 +73,35 @@
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9907"
+            "jmx_port": "9107"
         },
-
-
         {
             "entity_id": "8",
             "hostname": "localhost",
-            "role": "producer_performance",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9908"
+            "role": "broker",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9108"
         },
         {
             "entity_id": "9",
             "hostname": "localhost",
-            "role": "producer_performance",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9909"
+            "role": "broker",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9109"
         },
 
-
         {
             "entity_id": "10",
             "hostname": "localhost",
-            "role": "console_consumer",
-            "cluster_name":"target",
+            "role": "producer_performance",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9910"
+            "jmx_port": "9110"
         },
         {
             "entity_id": "11",
@@ -114,28 +110,26 @@
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9911"
+            "jmx_port": "9111"
         },
 
-
         {
             "entity_id": "12",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9912"
+            "jmx_port": "9112"
         },
         {
             "entity_id": "13",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9913"
+            "jmx_port": "9113"
         }
-
     ]
 }
diff --git a/system_test/mirror_maker_testsuite/testcase_15003/testcase_15003_properties.json b/system_test/mirror_maker_testsuite/testcase_15003/testcase_15003_properties.json
new file mode 100644
index 0000000000000..842c70eaf33fd
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15003/testcase_15003_properties.json
@@ -0,0 +1,156 @@
+{
+  "description": {"01":"Replication with Mirror Maker => Bounce Mirror Maker",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to a single topic - single partition.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:async, acks:-1, comp:1",
+                  "09":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "true",
+    "bounced_entity_downtime_sec": "30",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "2",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_11.log",
+      "config_filename": "console_consumer_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "log_filename": "mirror_maker_12.log",
+      "mirror_consumer_config_filename": "mirror_consumer_12.properties",
+      "mirror_producer_config_filename": "mirror_producer_12.properties"
+    },
+    {
+      "entity_id": "13",
+      "log_filename": "mirror_maker_13.log",
+      "mirror_consumer_config_filename": "mirror_consumer_13.properties",
+      "mirror_producer_config_filename": "mirror_producer_13.properties"
+    }
+   ]
+}
diff --git a/system_test/migration_tool_testsuite/testcase_9005/cluster_config.json b/system_test/mirror_maker_testsuite/testcase_15004/cluster_config.json
similarity index 71%
rename from system_test/migration_tool_testsuite/testcase_9005/cluster_config.json
rename to system_test/mirror_maker_testsuite/testcase_15004/cluster_config.json
index 9fcb3b0400bd8..f6fe86787f1c3 100644
--- a/system_test/migration_tool_testsuite/testcase_9005/cluster_config.json
+++ b/system_test/mirror_maker_testsuite/testcase_15004/cluster_config.json
@@ -5,68 +5,67 @@
             "hostname": "localhost",
             "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9900"
+            "jmx_port": "9100"
         },
         {
             "entity_id": "1",
             "hostname": "localhost",
             "role": "zookeeper",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9901"
+            "jmx_port": "9101"
         },
 
-
         {
             "entity_id": "2",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9902"
+            "jmx_port": "9102"
         },
         {
             "entity_id": "3",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9903"
+            "jmx_port": "9103"
         },
+
         {
             "entity_id": "4",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9904"
+            "jmx_port": "9104"
         },
-
-
         {
             "entity_id": "5",
             "hostname": "localhost",
             "role": "broker",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9905"
+            "jmx_port": "9105"
         },
         {
             "entity_id": "6",
             "hostname": "localhost",
             "role": "broker",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9906"
+            "jmx_port": "9106"
         },
+
         {
             "entity_id": "7",
             "hostname": "localhost",
@@ -74,38 +73,35 @@
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9907"
+            "jmx_port": "9107"
         },
-
-
         {
             "entity_id": "8",
             "hostname": "localhost",
-            "role": "producer_performance",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9908"
+            "role": "broker",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9108"
         },
         {
             "entity_id": "9",
             "hostname": "localhost",
-            "role": "producer_performance",
-            "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9909"
+            "role": "broker",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9109"
         },
 
-
         {
             "entity_id": "10",
             "hostname": "localhost",
-            "role": "console_consumer",
-            "cluster_name":"target",
+            "role": "producer_performance",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9910"
+            "jmx_port": "9110"
         },
         {
             "entity_id": "11",
@@ -114,28 +110,26 @@
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9911"
+            "jmx_port": "9111"
         },
 
-
         {
             "entity_id": "12",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9912"
+            "jmx_port": "9112"
         },
         {
             "entity_id": "13",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9913"
+            "jmx_port": "9113"
         }
-
     ]
 }
diff --git a/system_test/mirror_maker_testsuite/testcase_15004/testcase_15004_properties.json b/system_test/mirror_maker_testsuite/testcase_15004/testcase_15004_properties.json
new file mode 100644
index 0000000000000..48864e61afb9c
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15004/testcase_15004_properties.json
@@ -0,0 +1,156 @@
+{
+  "description": {"01":"Replication with Mirror Maker => Bounce Mirror Maker",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to a single topic - single partition.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "09":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "true",
+    "bounced_entity_downtime_sec": "30",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_11.log",
+      "config_filename": "console_consumer_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "log_filename": "mirror_maker_12.log",
+      "mirror_consumer_config_filename": "mirror_consumer_12.properties",
+      "mirror_producer_config_filename": "mirror_producer_12.properties"
+    },
+    {
+      "entity_id": "13",
+      "log_filename": "mirror_maker_13.log",
+      "mirror_consumer_config_filename": "mirror_consumer_13.properties",
+      "mirror_producer_config_filename": "mirror_producer_13.properties"
+    }
+   ]
+}
diff --git a/system_test/migration_tool_testsuite/testcase_9004/cluster_config.json b/system_test/mirror_maker_testsuite/testcase_15005/cluster_config.json
similarity index 59%
rename from system_test/migration_tool_testsuite/testcase_9004/cluster_config.json
rename to system_test/mirror_maker_testsuite/testcase_15005/cluster_config.json
index 766a001030f69..63ba37b70e476 100644
--- a/system_test/migration_tool_testsuite/testcase_9004/cluster_config.json
+++ b/system_test/mirror_maker_testsuite/testcase_15005/cluster_config.json
@@ -5,108 +5,149 @@
             "hostname": "localhost",
             "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9990"
+            "jmx_port": "9100"
         },
         {
             "entity_id": "1",
             "hostname": "localhost",
-            "role": "broker",
+            "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9991"
+            "jmx_port": "9101"
         },
+
         {
             "entity_id": "2",
             "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9102"
+        },
+        {
+            "entity_id": "3",
+            "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9103"
+        },
+
+        {
+            "entity_id": "4",
+            "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9992"
+            "jmx_port": "9104"
         },
         {
-            "entity_id": "3",
+            "entity_id": "5",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9993"
+            "jmx_port": "9105"
         },
         {
-            "entity_id": "4",
+            "entity_id": "6",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name":"source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9106"
+        },
+
+        {
+            "entity_id": "7",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9994"
+            "jmx_port": "9107"
         },
         {
-            "entity_id": "5",
+            "entity_id": "8",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9995"
+            "jmx_port": "9108"
         },
         {
-            "entity_id": "6",
+            "entity_id": "9",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9996"
+            "jmx_port": "9109"
         },
+
         {
-            "entity_id": "7",
+            "entity_id": "10",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name":"source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9110"
+        },
+        {
+            "entity_id": "11",
             "hostname": "localhost",
             "role": "producer_performance",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9997"
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9111"
         },
         {
-            "entity_id": "8",
+            "entity_id": "12",
             "hostname": "localhost",
             "role": "console_consumer",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9998"
+            "jmx_port": "9112"
         },
         {
-            "entity_id": "9",
+            "entity_id": "13",
             "hostname": "localhost",
-            "role": "zookeeper",
+            "role": "console_consumer",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9999"
+            "jmx_port": "9113"
         },
+
         {
-            "entity_id": "10",
+            "entity_id": "14",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9890"
+            "jmx_port": "9114"
         },
         {
-            "entity_id": "11",
+            "entity_id": "15",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9891"
+            "jmx_port": "9115"
         }
     ]
 }
diff --git a/system_test/mirror_maker_testsuite/testcase_15005/testcase_15005_properties.json b/system_test/mirror_maker_testsuite/testcase_15005/testcase_15005_properties.json
new file mode 100644
index 0000000000000..92b2a6b4ffeb8
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15005/testcase_15005_properties.json
@@ -0,0 +1,178 @@
+{
+  "description": {"01":"Replication with Mirror Maker => Bounce Mirror Maker",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to 2 topics - 2 partitions.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:async, acks:-1, comp:1",
+                  "09":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "true",
+    "bounced_entity_downtime_sec": "30",
+    "replica_factor": "3",
+    "num_partition": "2",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_2",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_11.log",
+      "config_filename": "producer_performance_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_12.log",
+      "config_filename": "console_consumer_12.properties"
+    },
+    {
+      "entity_id": "13",
+      "topic": "test_2",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_13.log",
+      "config_filename": "console_consumer_13.properties"
+    },
+
+    {
+      "entity_id": "14",
+      "log_filename": "mirror_maker_14.log",
+      "mirror_consumer_config_filename": "mirror_consumer_14.properties",
+      "mirror_producer_config_filename": "mirror_producer_14.properties"
+    },
+    {
+      "entity_id": "15",
+      "log_filename": "mirror_maker_15.log",
+      "mirror_consumer_config_filename": "mirror_consumer_15.properties",
+      "mirror_producer_config_filename": "mirror_producer_15.properties"
+    }
+   ]
+}
diff --git a/system_test/migration_tool_testsuite/testcase_9003/cluster_config.json b/system_test/mirror_maker_testsuite/testcase_15006/cluster_config.json
similarity index 59%
rename from system_test/migration_tool_testsuite/testcase_9003/cluster_config.json
rename to system_test/mirror_maker_testsuite/testcase_15006/cluster_config.json
index 766a001030f69..63ba37b70e476 100644
--- a/system_test/migration_tool_testsuite/testcase_9003/cluster_config.json
+++ b/system_test/mirror_maker_testsuite/testcase_15006/cluster_config.json
@@ -5,108 +5,149 @@
             "hostname": "localhost",
             "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9990"
+            "jmx_port": "9100"
         },
         {
             "entity_id": "1",
             "hostname": "localhost",
-            "role": "broker",
+            "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9991"
+            "jmx_port": "9101"
         },
+
         {
             "entity_id": "2",
             "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9102"
+        },
+        {
+            "entity_id": "3",
+            "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name":"target",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9103"
+        },
+
+        {
+            "entity_id": "4",
+            "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9992"
+            "jmx_port": "9104"
         },
         {
-            "entity_id": "3",
+            "entity_id": "5",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9993"
+            "jmx_port": "9105"
         },
         {
-            "entity_id": "4",
+            "entity_id": "6",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name":"source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9106"
+        },
+
+        {
+            "entity_id": "7",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9994"
+            "jmx_port": "9107"
         },
         {
-            "entity_id": "5",
+            "entity_id": "8",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9995"
+            "jmx_port": "9108"
         },
         {
-            "entity_id": "6",
+            "entity_id": "9",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9996"
+            "jmx_port": "9109"
         },
+
         {
-            "entity_id": "7",
+            "entity_id": "10",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name":"source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9110"
+        },
+        {
+            "entity_id": "11",
             "hostname": "localhost",
             "role": "producer_performance",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9997"
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9111"
         },
         {
-            "entity_id": "8",
+            "entity_id": "12",
             "hostname": "localhost",
             "role": "console_consumer",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9998"
+            "jmx_port": "9112"
         },
         {
-            "entity_id": "9",
+            "entity_id": "13",
             "hostname": "localhost",
-            "role": "zookeeper",
+            "role": "console_consumer",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9999"
+            "jmx_port": "9113"
         },
+
         {
-            "entity_id": "10",
+            "entity_id": "14",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9890"
+            "jmx_port": "9114"
         },
         {
-            "entity_id": "11",
+            "entity_id": "15",
             "hostname": "localhost",
-            "role": "migration_tool",
+            "role": "mirror_maker",
             "cluster_name":"target",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9891"
+            "jmx_port": "9115"
         }
     ]
 }
diff --git a/system_test/mirror_maker_testsuite/testcase_15006/testcase_15006_properties.json b/system_test/mirror_maker_testsuite/testcase_15006/testcase_15006_properties.json
new file mode 100644
index 0000000000000..7d5019c6aa7fe
--- /dev/null
+++ b/system_test/mirror_maker_testsuite/testcase_15006/testcase_15006_properties.json
@@ -0,0 +1,178 @@
+{
+  "description": {"01":"Replication with Mirror Maker => Bounce Mirror Maker",
+                  "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET",
+                  "03":"Set up 2-node Zk cluster for both SOURCE & TARGET",
+                  "04":"Produce and consume messages to 2 topics - 2 partitions.",
+                  "05":"This test sends messages to 3 replicas",
+                  "06":"At the end it verifies the log size and contents",
+                  "07":"Use a consumer to verify no message loss in TARGET cluster.",
+                  "08":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "09":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "bounce_leader": "false",
+    "bounce_mirror_maker": "true",
+    "bounced_entity_downtime_sec": "30",
+    "replica_factor": "3",
+    "num_partition": "2",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "clientPort": "2118",
+      "dataDir": "/tmp/zookeeper_1",
+      "log_filename": "zookeeper_1.log",
+      "config_filename": "zookeeper_1.properties"
+    },
+
+    {
+      "entity_id": "2",
+      "clientPort": "2128",
+      "dataDir": "/tmp/zookeeper_2",
+      "log_filename": "zookeeper_2.log",
+      "config_filename": "zookeeper_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "clientPort": "2138",
+      "dataDir": "/tmp/zookeeper_3",
+      "log_filename": "zookeeper_3.log",
+      "config_filename": "zookeeper_3.properties"
+    },
+
+    {
+      "entity_id": "4",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_5_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_5.log",
+      "config_filename": "kafka_server_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_6_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_6.log",
+      "config_filename": "kafka_server_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_7_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_7.log",
+      "config_filename": "kafka_server_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "port": "9095",
+      "broker.id": "5",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_8_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_8.log",
+      "config_filename": "kafka_server_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "port": "9096",
+      "broker.id": "6",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_9_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_9.log",
+      "config_filename": "kafka_server_9.properties"
+    },
+
+    {
+      "entity_id": "10",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "11",
+      "topic": "test_2",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_11.log",
+      "config_filename": "producer_performance_11.properties"
+    },
+
+    {
+      "entity_id": "12",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_12.log",
+      "config_filename": "console_consumer_12.properties"
+    },
+    {
+      "entity_id": "13",
+      "topic": "test_2",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_13.log",
+      "config_filename": "console_consumer_13.properties"
+    },
+
+    {
+      "entity_id": "14",
+      "log_filename": "mirror_maker_14.log",
+      "mirror_consumer_config_filename": "mirror_consumer_14.properties",
+      "mirror_producer_config_filename": "mirror_producer_14.properties"
+    },
+    {
+      "entity_id": "15",
+      "log_filename": "mirror_maker_15.log",
+      "mirror_consumer_config_filename": "mirror_consumer_15.properties",
+      "mirror_producer_config_filename": "mirror_producer_15.properties"
+    }
+   ]
+}
diff --git a/system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json b/system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json
index 287cab9a9b6b9..0891836421909 100644
--- a/system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -54,7 +54,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -65,7 +65,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -76,7 +76,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -87,7 +87,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -98,7 +98,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -109,7 +109,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -119,6 +119,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -141,6 +142,7 @@
 
     {
       "entity_id": "12",
+      "new-producer":"true",
       "log_filename": "mirror_maker_12.log",
       "mirror_consumer_config_filename": "mirror_consumer_12.properties",
       "mirror_producer_config_filename": "mirror_producer_12.properties"
diff --git a/system_test/mirror_maker_testsuite/testcase_5002/testcase_5002_properties.json b/system_test/mirror_maker_testsuite/testcase_5002/testcase_5002_properties.json
index 5457eb1d26526..56e481255cbbb 100644
--- a/system_test/mirror_maker_testsuite/testcase_5002/testcase_5002_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5002/testcase_5002_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -54,7 +54,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -65,7 +65,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -76,7 +76,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -87,7 +87,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -98,7 +98,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -109,7 +109,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -119,6 +119,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -141,6 +142,7 @@
 
     {
       "entity_id": "12",
+      "new-producer":"true",
       "log_filename": "mirror_maker_12.log",
       "mirror_consumer_config_filename": "mirror_consumer_12.properties",
       "mirror_producer_config_filename": "mirror_producer_12.properties"
diff --git a/system_test/mirror_maker_testsuite/testcase_5003/testcase_5003_properties.json b/system_test/mirror_maker_testsuite/testcase_5003/testcase_5003_properties.json
index 98fefee5838a2..8f8c47af4e797 100644
--- a/system_test/mirror_maker_testsuite/testcase_5003/testcase_5003_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5003/testcase_5003_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -55,7 +55,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -66,7 +66,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -77,7 +77,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -88,7 +88,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -99,7 +99,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -110,7 +110,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -120,6 +120,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "2",
@@ -142,12 +143,14 @@
 
     {
       "entity_id": "12",
+      "new-producer":"true",
       "log_filename": "mirror_maker_12.log",
       "mirror_consumer_config_filename": "mirror_consumer_12.properties",
       "mirror_producer_config_filename": "mirror_producer_12.properties"
     },
     {
       "entity_id": "13",
+      "new-producer":"true",
       "log_filename": "mirror_maker_13.log",
       "mirror_consumer_config_filename": "mirror_consumer_13.properties",
       "mirror_producer_config_filename": "mirror_producer_13.properties"
diff --git a/system_test/mirror_maker_testsuite/testcase_5004/testcase_5004_properties.json b/system_test/mirror_maker_testsuite/testcase_5004/testcase_5004_properties.json
index 6067b1263d0a5..baa639b688888 100644
--- a/system_test/mirror_maker_testsuite/testcase_5004/testcase_5004_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5004/testcase_5004_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -55,7 +55,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -66,7 +66,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -77,7 +77,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -88,7 +88,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -99,7 +99,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -110,7 +110,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -120,6 +120,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -142,12 +143,14 @@
 
     {
       "entity_id": "12",
+      "new-producer":"true",
       "log_filename": "mirror_maker_12.log",
       "mirror_consumer_config_filename": "mirror_consumer_12.properties",
       "mirror_producer_config_filename": "mirror_producer_12.properties"
     },
     {
       "entity_id": "13",
+      "new-producer":"true",
       "log_filename": "mirror_maker_13.log",
       "mirror_consumer_config_filename": "mirror_consumer_13.properties",
       "mirror_producer_config_filename": "mirror_producer_13.properties"
diff --git a/system_test/mirror_maker_testsuite/testcase_5005/testcase_5005_properties.json b/system_test/mirror_maker_testsuite/testcase_5005/testcase_5005_properties.json
index 58483ad375cd1..8c383756d88ca 100644
--- a/system_test/mirror_maker_testsuite/testcase_5005/testcase_5005_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5005/testcase_5005_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -55,7 +55,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -66,7 +66,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -77,7 +77,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -88,7 +88,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -99,7 +99,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -110,7 +110,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -120,6 +120,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -133,6 +134,7 @@
     },
     {
       "entity_id": "11",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
@@ -164,12 +166,14 @@
 
     {
       "entity_id": "14",
+      "new-producer":"true",
       "log_filename": "mirror_maker_14.log",
       "mirror_consumer_config_filename": "mirror_consumer_14.properties",
       "mirror_producer_config_filename": "mirror_producer_14.properties"
     },
     {
       "entity_id": "15",
+      "new-producer":"true",
       "log_filename": "mirror_maker_15.log",
       "mirror_consumer_config_filename": "mirror_consumer_15.properties",
       "mirror_producer_config_filename": "mirror_producer_15.properties"
diff --git a/system_test/mirror_maker_testsuite/testcase_5006/testcase_5006_properties.json b/system_test/mirror_maker_testsuite/testcase_5006/testcase_5006_properties.json
index 1d9190ce7c1b5..fb275330bd188 100644
--- a/system_test/mirror_maker_testsuite/testcase_5006/testcase_5006_properties.json
+++ b/system_test/mirror_maker_testsuite/testcase_5006/testcase_5006_properties.json
@@ -7,7 +7,7 @@
                   "06":"At the end it verifies the log size and contents",
                   "07":"Use a consumer to verify no message loss in TARGET cluster.",
                   "08":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "09":"Log segment size    : 10240"
+                  "09":"Log segment size    : 20480"
   },
   "testcase_args": {
     "bounce_leader": "false",
@@ -55,7 +55,7 @@
       "entity_id": "4",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_4_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -66,7 +66,7 @@
       "entity_id": "5",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_5_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -77,7 +77,7 @@
       "entity_id": "6",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_6_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -88,7 +88,7 @@
       "entity_id": "7",
       "port": "9094",
       "broker.id": "4",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_7_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -99,7 +99,7 @@
       "entity_id": "8",
       "port": "9095",
       "broker.id": "5",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_8_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -110,7 +110,7 @@
       "entity_id": "9",
       "port": "9096",
       "broker.id": "6",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_9_logs",
       "default.replication.factor": "3",
       "num.partitions": "5",
@@ -120,6 +120,7 @@
 
     {
       "entity_id": "10",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -133,6 +134,7 @@
     },
     {
       "entity_id": "11",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
@@ -164,12 +166,14 @@
 
     {
       "entity_id": "14",
+      "new-producer":"true",
       "log_filename": "mirror_maker_14.log",
       "mirror_consumer_config_filename": "mirror_consumer_14.properties",
       "mirror_producer_config_filename": "mirror_producer_14.properties"
     },
     {
       "entity_id": "15",
+      "new-producer":"true",
       "log_filename": "mirror_maker_15.log",
       "mirror_consumer_config_filename": "mirror_consumer_15.properties",
       "mirror_producer_config_filename": "mirror_producer_15.properties"
diff --git a/system_test/migration_tool_testsuite/cluster_config.json b/system_test/offset_management_testsuite/cluster_config.json
similarity index 64%
rename from system_test/migration_tool_testsuite/cluster_config.json
rename to system_test/offset_management_testsuite/cluster_config.json
index 8353e56e55aab..dcca2007de4bd 100644
--- a/system_test/migration_tool_testsuite/cluster_config.json
+++ b/system_test/offset_management_testsuite/cluster_config.json
@@ -5,99 +5,99 @@
             "hostname": "localhost",
             "role": "zookeeper",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9990"
+            "jmx_port": "9100"
         },
         {
             "entity_id": "1",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9991"
+            "jmx_port": "9101"
         },
         {
             "entity_id": "2",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9992"
+            "jmx_port": "9102"
         },
         {
             "entity_id": "3",
             "hostname": "localhost",
             "role": "broker",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
+            "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9993"
+            "jmx_port": "9103"
         },
         {
             "entity_id": "4",
             "hostname": "localhost",
             "role": "broker",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9994"
+            "jmx_port": "9104"
         },
         {
             "entity_id": "5",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"target",
+            "role": "producer_performance",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9995"
+            "jmx_port": "9105"
         },
         {
             "entity_id": "6",
             "hostname": "localhost",
-            "role": "broker",
-            "cluster_name":"target",
+            "role": "console_consumer",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9996"
+            "jmx_port": "9106"
         },
         {
             "entity_id": "7",
             "hostname": "localhost",
-            "role": "producer_performance",
+            "role": "console_consumer",
             "cluster_name":"source",
-            "kafka_home": "system_test/migration_tool_testsuite/0.7",
-            "java_home": "/export/apps/jdk/JDK-1_6_0_27",
-            "jmx_port": "9997"
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9107"
         },
         {
             "entity_id": "8",
             "hostname": "localhost",
             "role": "console_consumer",
-            "cluster_name":"target",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9998"
+            "jmx_port": "9108"
         },
         {
             "entity_id": "9",
             "hostname": "localhost",
-            "role": "zookeeper",
-            "cluster_name":"target",
+            "role": "console_consumer",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9999"
+            "jmx_port": "9109"
         },
         {
             "entity_id": "10",
             "hostname": "localhost",
-            "role": "migration_tool",
-            "cluster_name":"target",
+            "role": "console_consumer",
+            "cluster_name":"source",
             "kafka_home": "default",
             "java_home": "default",
-            "jmx_port": "9890"
+            "jmx_port": "9110"
         }
     ]
 }
diff --git a/system_test/offset_management_testsuite/config/console_consumer.properties b/system_test/offset_management_testsuite/config/console_consumer.properties
new file mode 100644
index 0000000000000..a2ab8b9c155e2
--- /dev/null
+++ b/system_test/offset_management_testsuite/config/console_consumer.properties
@@ -0,0 +1,2 @@
+auto.offset.reset=smallest
+auto.commit.interval.ms=1000
diff --git a/system_test/offset_management_testsuite/config/producer_performance.properties b/system_test/offset_management_testsuite/config/producer_performance.properties
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/system_test/migration_tool_testsuite/config/server.properties b/system_test/offset_management_testsuite/config/server.properties
similarity index 88%
rename from system_test/migration_tool_testsuite/config/server.properties
rename to system_test/offset_management_testsuite/config/server.properties
index 54144a28a0800..2b988f86052a7 100644
--- a/system_test/migration_tool_testsuite/config/server.properties
+++ b/system_test/offset_management_testsuite/config/server.properties
@@ -95,11 +95,11 @@ log.retention.hours=168
 # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 # segments don't drop below log.retention.bytes.
 #log.retention.bytes=1073741824
+log.retention.bytes=-1
 
 # The maximum size of a log segment file. When this size is reached a new log segment will be created.
-#log.segment.bytes=536870912
-#log.segment.bytes=102400
-log.segment.bytes=128
+#log.segment.size=536870912
+log.segment.bytes=102400
 
 # The interval at which log segments are checked to see if they can be deleted according 
 # to the retention policies
@@ -115,11 +115,30 @@ enable.zookeeper=true
 # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
 # You can also append an optional chroot string to the urls to specify the
 # root directory for all kafka znodes.
-zk.connect=localhost:2181
 zookeeper.connect=localhost:2181
 
 # Timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
 zookeeper.connection.timeout.ms=1000000
 
 monitoring.period.secs=1
+message.max.bytes=1000000
+queued.max.requests=500
+log.roll.hours=168
+log.index.size.max.bytes=10485760
+log.index.interval.bytes=4096
+auto.create.topics.enable=true
+controller.socket.timeout.ms=30000
+controller.message.queue.size=10
+default.replication.factor=1
+replica.lag.time.max.ms=10000
+replica.lag.max.messages=4000
+replica.socket.timeout.ms=30000
+replica.socket.receive.buffer.bytes=65536
+replica.fetch.max.bytes=1048576
+replica.fetch.wait.max.ms=500
+replica.fetch.min.bytes=4096
+num.replica.fetchers=1
+
+offsets.topic.num.partitions=2
+offsets.topic.replication.factor=4
+
diff --git a/system_test/migration_tool_testsuite/config/zookeeper.properties b/system_test/offset_management_testsuite/config/zookeeper.properties
similarity index 96%
rename from system_test/migration_tool_testsuite/config/zookeeper.properties
rename to system_test/offset_management_testsuite/config/zookeeper.properties
index 74cbf90428f81..5474a72be1a2c 100644
--- a/system_test/migration_tool_testsuite/config/zookeeper.properties
+++ b/system_test/offset_management_testsuite/config/zookeeper.properties
@@ -18,3 +18,6 @@ dataDir=/tmp/zookeeper
 clientPort=2181
 # disable the per-ip limit on the number of connections since this is a non-production config
 maxClientCnxns=0
+syncLimit=5
+initLimit=10
+tickTime=2000
diff --git a/system_test/migration_tool_testsuite/migration_tool_test.py b/system_test/offset_management_testsuite/offset_management_test.py
similarity index 78%
rename from system_test/migration_tool_testsuite/migration_tool_test.py
rename to system_test/offset_management_testsuite/offset_management_test.py
index ce6f4f684beaa..aa389105aa427 100644
--- a/system_test/migration_tool_testsuite/migration_tool_test.py
+++ b/system_test/offset_management_testsuite/offset_management_test.py
@@ -17,14 +17,11 @@
 #!/usr/bin/env python
 
 # ===================================
-# migration_tool_test.py
+# offset_management_test.py
 # ===================================
 
-import inspect
-import logging
 import os
 import signal
-import subprocess
 import sys
 import time
 import traceback
@@ -41,7 +38,7 @@
 import kafka_system_test_utils
 import metrics
 
-class MigrationToolTest(ReplicationUtils, SetupUtils):
+class OffsetManagementTest(ReplicationUtils, SetupUtils):
 
     testModuleAbsPathName = os.path.realpath(__file__)
     testSuiteAbsPathName  = os.path.abspath(os.path.dirname(testModuleAbsPathName))
@@ -54,7 +51,7 @@ def __init__(self, systemTestEnv):
         #     "clusterEntityConfigDictList"
         self.systemTestEnv = systemTestEnv
 
-        super(MigrationToolTest, self).__init__(self)
+        super(OffsetManagementTest, self).__init__(self)
 
         # dict to pass user-defined attributes to logger argument: "extra"
         d = {'name_of_class': self.__class__.__name__}
@@ -118,32 +115,31 @@ def runTest(self):
                 # ============================================================================== #
     
                 # initialize self.testcaseEnv with user-defined environment variables (product specific)
-                self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = ""
                 self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"]    = False
                 self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False
 
                 # initialize signal handler
                 signal.signal(signal.SIGINT, self.signal_handler)
-    
+
                 # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file:
                 #   system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json
                 self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data(
                     self.testcaseEnv.testcasePropJsonPathName)
-    
+                 
                 # clean up data directories specified in zookeeper.properties and kafka_server_<n>.properties
                 kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv)
 
                 # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase
                 # for collecting logs from remote machines
                 kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv)
-    
+
                 # TestcaseEnv - initialize producer & consumer config / log file pathnames
                 kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv)
 
                 # generate remote hosts log/config dirs if not exist
                 kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv)
     
-                # generate properties files for zookeeper, kafka, producer, consumer and mirror-maker:
+                # generate properties files for zookeeper, kafka, producer, and consumer:
                 # 1. copy system_test/<suite_name>_testsuite/config/*.properties to 
                 #    system_test/<suite_name>_testsuite/testcase_<n>/config/
                 # 2. update all properties files in system_test/<suite_name>_testsuite/testcase_<n>/config
@@ -151,7 +147,7 @@ def runTest(self):
                 #    system_test/<suite_name>_testsuite/testcase_<n>/testcase_<n>_properties.json
                 kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName,
                     self.testcaseEnv, self.systemTestEnv)
-
+               
                 # =============================================
                 # preparing all entities to start the test
                 # =============================================
@@ -159,19 +155,14 @@ def runTest(self):
                 kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv)
                 self.anonLogger.info("sleeping for 2s")
                 time.sleep(2)
-        
+
                 self.log_message("starting brokers")
                 kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv)
                 self.anonLogger.info("sleeping for 5s")
                 time.sleep(5)
 
-                self.log_message("starting migration tool")
-                kafka_system_test_utils.start_migration_tool(self.systemTestEnv, self.testcaseEnv)
-                self.anonLogger.info("sleeping for 5s")
-                time.sleep(5)
-
-                self.log_message("creating topics")
-                kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv)
+                self.log_message("creating offset topic")
+                kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 3, 2)
                 self.anonLogger.info("sleeping for 5s")
                 time.sleep(5)
 
@@ -179,49 +170,65 @@ def runTest(self):
                 # starting producer 
                 # =============================================
                 self.log_message("starting producer in the background")
-                kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, True)
+                kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False)
                 msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"]
                 self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages")
                 time.sleep(int(msgProducingFreeTimeSec))
 
+                kafka_system_test_utils.start_console_consumers(self.systemTestEnv, self.testcaseEnv)
+
+                kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0)
+
                 # =============================================
-                # A while-loop to bounce leader as specified
+                # A while-loop to bounce consumers as specified
                 # by "num_iterations" in testcase_n_properties.json
                 # =============================================
                 i = 1
                 numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"])
-                bouncedEntityDownTimeSec = 1
+                bouncedEntityDownTimeSec = 10
                 try:
                     bouncedEntityDownTimeSec = int(self.testcaseEnv.testcaseArgumentsDict["bounced_entity_downtime_sec"])
                 except:
                     pass
 
+                # group1 -> offsets partition 0 // has one consumer; eid: 6
+                # group2 -> offsets partition 1 // has four consumers; eid: 7, 8, 9, 10
+
+                offsets_0_leader_entity = kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 0)
+                offsets_1_leader_entity = kafka_system_test_utils.get_leader_for(self.systemTestEnv, self.testcaseEnv, "__consumer_offsets", 1)
+
                 while i <= numIterations:
 
                     self.log_message("Iteration " + str(i) + " of " + str(numIterations))
+                    kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, offsets_0_leader_entity, self.testcaseEnv.entityBrokerParentPidDict[offsets_0_leader_entity])
+                    kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, offsets_1_leader_entity, self.testcaseEnv.entityBrokerParentPidDict[offsets_1_leader_entity])
 
                     # =============================================
-                    # Bounce Migration Tool
+                    # Bounce consumers if specified in testcase config
                     # =============================================
-                    bounceMigrationTool = self.testcaseEnv.testcaseArgumentsDict["bounce_migration_tool"]
-                    self.log_message("bounce_migration_tool flag : " + bounceMigrationTool)
-                    if (bounceMigrationTool.lower() == "true"):
+                    bounceConsumers = self.testcaseEnv.testcaseArgumentsDict["bounce_consumers"]
+                    self.log_message("bounce_consumers flag : " + bounceConsumers)
+                    if (bounceConsumers.lower() == "true"):
 
-                        clusterConfigList         = self.systemTestEnv.clusterEntityConfigDictList
-                        migrationToolEntityIdList = system_test_utils.get_data_from_list_of_dicts(
-                                                    clusterConfigList, "role", "migration_tool", "entity_id")
+                        clusterConfigList       = self.systemTestEnv.clusterEntityConfigDictList
+                        consumerEntityIdList    = system_test_utils.get_data_from_list_of_dicts( clusterConfigList, "role", "console_consumer", "entity_id")
 
-                        stoppedMigrationToolEntityId = migrationToolEntityIdList[0]
-                        migrationToolPPid = self.testcaseEnv.entityMigrationToolParentPidDict[stoppedMigrationToolEntityId]
+                        for stoppedConsumerEntityId in consumerEntityIdList:
+                            consumerPPID = self.testcaseEnv.entityConsoleConsumerParentPidDict[stoppedConsumerEntityId]
+                            self.log_message("stopping consumer: " + consumerPPID)
+                            kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, stoppedConsumerEntityId, consumerPPID)
 
-                        self.log_message("stopping migration tool : " + migrationToolPPid)
-                        kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, stoppedMigrationToolEntityId, migrationToolPPid)
                         self.anonLogger.info("sleeping for " + str(bouncedEntityDownTimeSec) + " sec")
                         time.sleep(bouncedEntityDownTimeSec)
+                        # leaders would have changed during the above bounce.
+                        self.log_message("starting the previously terminated consumers.")
+                        for stoppedConsumerEntityId in consumerEntityIdList:
+                            # starting previously terminated consumer
+                            kafka_system_test_utils.start_console_consumers(self.systemTestEnv, self.testcaseEnv, stoppedConsumerEntityId)
 
-                        # starting previously terminated broker 
-                        self.log_message("starting the previously terminated migration tool")
-                        kafka_system_test_utils.start_migration_tool(self.systemTestEnv, self.testcaseEnv, stoppedMigrationToolEntityId)
+                        self.log_message("starting the previously terminated brokers")
+                        kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, offsets_0_leader_entity)
+                        kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, offsets_1_leader_entity)
 
                     self.anonLogger.info("sleeping for 15s")
                     time.sleep(15)
@@ -253,17 +260,9 @@ def runTest(self):
                     self.testcaseEnv.lock.release()
                     time.sleep(2)
 
-                #print "\n\n#### sleeping for 30 min ...\n\n"
-                #time.sleep(1800)
-                
-                # =============================================
-                # starting consumer
-                # =============================================
-                self.log_message("starting consumer in the background")
-                kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv)
-                self.anonLogger.info("sleeping for 20s")
-                time.sleep(20)
-                    
+                self.anonLogger.info("sleeping for 15s")
+                time.sleep(15)
+
                 # =============================================
                 # this testcase is completed - stop all entities
                 # =============================================
@@ -281,29 +280,17 @@ def runTest(self):
                 # collect logs from remote hosts
                 # =============================================
                 kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv)
-    
+
                 # =============================================
                 # validate the data matched and checksum
                 # =============================================
                 self.log_message("validating data matched")
-                kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv, replicationUtils)
-                kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv)
+                kafka_system_test_utils.validate_data_matched_in_multi_topics_from_single_consumer_producer(self.systemTestEnv, self.testcaseEnv, replicationUtils)
 
-                # =============================================
-                # draw graphs
-                # =============================================
-                metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME, 
-                                        self.testcaseEnv, 
-                                        self.systemTestEnv.clusterEntityConfigDictList)
-                
-                # build dashboard, one for each role
-                metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME,
-                                             self.testcaseEnv.testCaseDashboardsDir,
-                                             self.systemTestEnv.clusterEntityConfigDictList)
-                
             except Exception as e:
                 self.log_message("Exception while running test {0}".format(e))
                 traceback.print_exc()
+                self.testcaseEnv.validationStatusDict["Test completed"] = "FAILED"
 
             finally:
                 if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly:
diff --git a/system_test/offset_management_testsuite/testcase_7001/testcase_7001_properties.json b/system_test/offset_management_testsuite/testcase_7001/testcase_7001_properties.json
new file mode 100644
index 0000000000000..1f0b7180a9d0a
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7001/testcase_7001_properties.json
@@ -0,0 +1,95 @@
+{
+  "description": {"01":"To Test : 'Basic offset management test.'",
+                  "02":"Set up a Zk and Kafka cluster.",
+                  "03":"Produce messages to a multiple topics - various partition counts.",
+                  "04":"Start multiple consumer groups to read various subsets of above topics.",
+                  "05":"Bounce consumers.",
+                  "06":"Verify that there are no duplicate messages or lost messages on any consumer group.",
+                  "07":"Producer dimensions : mode:sync, acks:-1, comp:0"
+  },
+  "testcase_args": {
+    "bounce_leaders": "false",
+    "bounce_consumers": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50",
+    "num_topics_for_auto_generated_string":"1"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_1.log",
+      "config_filename": "kafka_server_1.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_2.log",
+      "config_filename": "kafka_server_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_3.log",
+      "config_filename": "kafka_server_3.properties"
+    },
+    {
+      "entity_id": "4",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test",
+      "threads": "3",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "1000",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "6",
+      "topic": "test_0001",
+      "group.id": "group1",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_6.properties"
+    }
+   ]
+}
diff --git a/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_1.properties b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_1.properties
new file mode 100644
index 0000000000000..41ec6e49272f1
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_1.properties
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=1
+
+# Hostname the broker will advertise to consumers. If not set, kafka will use the value returned
+# from InetAddress.getLocalHost().  If there are multiple interfaces getLocalHost
+# may not be what you want.
+#host.name=
+
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port=9091
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=2
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# The directory under which to store log files
+log.dir=/tmp/kafka_server_1_logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=5
+
+# Overrides for for the default given by num.partitions on a per-topic basis
+#topic.partition.count.map=topic1:3, topic2:4
+
+############################# Log Flush Policy #############################
+
+# The following configurations control the flush of data to disk. This is the most
+# important performance knob in kafka.
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data is at greater risk of loss in the event of a crash.
+#    2. Latency: Data is not made available to consumers until it is flushed (which adds latency).
+#    3. Throughput: The flush is generally the most expensive operation. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+log.flush.interval.ms=1000
+
+# Per-topic overrides for log.flush.interval.ms
+#log.flush.intervals.ms.per.topic=topic1:1000, topic2:3000
+
+# The interval (in ms) at which logs are checked to see if they need to be flushed to disk.
+log.flush.scheduler.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+log.retention.bytes=-1
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+#log.segment.size=536870912
+log.segment.bytes=10240
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.cleanup.interval.mins=1
+
+############################# Zookeeper #############################
+
+# Enable connecting to zookeeper
+enable.zookeeper=true
+
+# Zk connection string (see zk docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=localhost:2108
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+
+monitoring.period.secs=1
+message.max.bytes=1000000
+queued.max.requests=500
+log.roll.hours=168
+log.index.size.max.bytes=10485760
+log.index.interval.bytes=4096
+auto.create.topics.enable=true
+controller.socket.timeout.ms=30000
+controller.message.queue.size=10
+default.replication.factor=3
+replica.lag.time.max.ms=10000
+replica.lag.max.messages=4000
+replica.socket.timeout.ms=30000
+replica.socket.receive.buffer.bytes=65536
+replica.fetch.max.bytes=1048576
+replica.fetch.wait.max.ms=500
+replica.fetch.min.bytes=4096
+num.replica.fetchers=1
+
+offsets.topic.num.partitions=2
+offsets.topic.replication.factor=4
+
+kafka.csv.metrics.dir=/home/jkoshy/Projects/kafka/system_test/offset_management_testsuite/testcase_7002/logs/broker-1/metrics
+kafka.csv.metrics.reporter.enabled=true
+kafka.metrics.polling.interval.secs=5
+kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
diff --git a/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_2.properties b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_2.properties
new file mode 100644
index 0000000000000..727e23701d6c2
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_2.properties
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=2
+
+# Hostname the broker will advertise to consumers. If not set, kafka will use the value returned
+# from InetAddress.getLocalHost().  If there are multiple interfaces getLocalHost
+# may not be what you want.
+#host.name=
+
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port=9092
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=2
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# The directory under which to store log files
+log.dir=/tmp/kafka_server_2_logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=5
+
+# Overrides for for the default given by num.partitions on a per-topic basis
+#topic.partition.count.map=topic1:3, topic2:4
+
+############################# Log Flush Policy #############################
+
+# The following configurations control the flush of data to disk. This is the most
+# important performance knob in kafka.
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data is at greater risk of loss in the event of a crash.
+#    2. Latency: Data is not made available to consumers until it is flushed (which adds latency).
+#    3. Throughput: The flush is generally the most expensive operation. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+log.flush.interval.ms=1000
+
+# Per-topic overrides for log.flush.interval.ms
+#log.flush.intervals.ms.per.topic=topic1:1000, topic2:3000
+
+# The interval (in ms) at which logs are checked to see if they need to be flushed to disk.
+log.flush.scheduler.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+log.retention.bytes=-1
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+#log.segment.size=536870912
+log.segment.bytes=10240
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.cleanup.interval.mins=1
+
+############################# Zookeeper #############################
+
+# Enable connecting to zookeeper
+enable.zookeeper=true
+
+# Zk connection string (see zk docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=localhost:2108
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+
+monitoring.period.secs=1
+message.max.bytes=1000000
+queued.max.requests=500
+log.roll.hours=168
+log.index.size.max.bytes=10485760
+log.index.interval.bytes=4096
+auto.create.topics.enable=true
+controller.socket.timeout.ms=30000
+controller.message.queue.size=10
+default.replication.factor=3
+replica.lag.time.max.ms=10000
+replica.lag.max.messages=4000
+replica.socket.timeout.ms=30000
+replica.socket.receive.buffer.bytes=65536
+replica.fetch.max.bytes=1048576
+replica.fetch.wait.max.ms=500
+replica.fetch.min.bytes=4096
+num.replica.fetchers=1
+
+offsets.topic.num.partitions=2
+offsets.topic.replication.factor=4
+
+kafka.csv.metrics.dir=/home/jkoshy/Projects/kafka/system_test/offset_management_testsuite/testcase_7002/logs/broker-2/metrics
+kafka.csv.metrics.reporter.enabled=true
+kafka.metrics.polling.interval.secs=5
+kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
diff --git a/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_3.properties b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_3.properties
new file mode 100644
index 0000000000000..e6fbbe1e0532e
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_3.properties
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=3
+
+# Hostname the broker will advertise to consumers. If not set, kafka will use the value returned
+# from InetAddress.getLocalHost().  If there are multiple interfaces getLocalHost
+# may not be what you want.
+#host.name=
+
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port=9093
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=2
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# The directory under which to store log files
+log.dir=/tmp/kafka_server_3_logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=5
+
+# Overrides for for the default given by num.partitions on a per-topic basis
+#topic.partition.count.map=topic1:3, topic2:4
+
+############################# Log Flush Policy #############################
+
+# The following configurations control the flush of data to disk. This is the most
+# important performance knob in kafka.
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data is at greater risk of loss in the event of a crash.
+#    2. Latency: Data is not made available to consumers until it is flushed (which adds latency).
+#    3. Throughput: The flush is generally the most expensive operation. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+log.flush.interval.ms=1000
+
+# Per-topic overrides for log.flush.interval.ms
+#log.flush.intervals.ms.per.topic=topic1:1000, topic2:3000
+
+# The interval (in ms) at which logs are checked to see if they need to be flushed to disk.
+log.flush.scheduler.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+log.retention.bytes=-1
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+#log.segment.size=536870912
+log.segment.bytes=10240
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.cleanup.interval.mins=1
+
+############################# Zookeeper #############################
+
+# Enable connecting to zookeeper
+enable.zookeeper=true
+
+# Zk connection string (see zk docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=localhost:2108
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+
+monitoring.period.secs=1
+message.max.bytes=1000000
+queued.max.requests=500
+log.roll.hours=168
+log.index.size.max.bytes=10485760
+log.index.interval.bytes=4096
+auto.create.topics.enable=true
+controller.socket.timeout.ms=30000
+controller.message.queue.size=10
+default.replication.factor=3
+replica.lag.time.max.ms=10000
+replica.lag.max.messages=4000
+replica.socket.timeout.ms=30000
+replica.socket.receive.buffer.bytes=65536
+replica.fetch.max.bytes=1048576
+replica.fetch.wait.max.ms=500
+replica.fetch.min.bytes=4096
+num.replica.fetchers=1
+
+offsets.topic.num.partitions=2
+offsets.topic.replication.factor=4
+
+kafka.csv.metrics.dir=/home/jkoshy/Projects/kafka/system_test/offset_management_testsuite/testcase_7002/logs/broker-3/metrics
+kafka.csv.metrics.reporter.enabled=true
+kafka.metrics.polling.interval.secs=5
+kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
diff --git a/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_4.properties b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_4.properties
new file mode 100644
index 0000000000000..fee65bce63564
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7002/config/kafka_server_4.properties
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=4
+
+# Hostname the broker will advertise to consumers. If not set, kafka will use the value returned
+# from InetAddress.getLocalHost().  If there are multiple interfaces getLocalHost
+# may not be what you want.
+#host.name=
+
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port=9094
+
+# The number of threads handling network requests
+num.network.threads=2
+ 
+# The number of threads doing disk I/O
+num.io.threads=2
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# The directory under which to store log files
+log.dir=/tmp/kafka_server_4_logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=5
+
+# Overrides for for the default given by num.partitions on a per-topic basis
+#topic.partition.count.map=topic1:3, topic2:4
+
+############################# Log Flush Policy #############################
+
+# The following configurations control the flush of data to disk. This is the most
+# important performance knob in kafka.
+# There are a few important trade-offs here:
+#    1. Durability: Unflushed data is at greater risk of loss in the event of a crash.
+#    2. Latency: Data is not made available to consumers until it is flushed (which adds latency).
+#    3. Throughput: The flush is generally the most expensive operation. 
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+log.flush.interval.ms=1000
+
+# Per-topic overrides for log.flush.interval.ms
+#log.flush.intervals.ms.per.topic=topic1:1000, topic2:3000
+
+# The interval (in ms) at which logs are checked to see if they need to be flushed to disk.
+log.flush.scheduler.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+log.retention.bytes=-1
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+#log.segment.size=536870912
+log.segment.bytes=10240
+
+# The interval at which log segments are checked to see if they can be deleted according 
+# to the retention policies
+log.cleanup.interval.mins=1
+
+############################# Zookeeper #############################
+
+# Enable connecting to zookeeper
+enable.zookeeper=true
+
+# Zk connection string (see zk docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=localhost:2108
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
+
+monitoring.period.secs=1
+message.max.bytes=1000000
+queued.max.requests=500
+log.roll.hours=168
+log.index.size.max.bytes=10485760
+log.index.interval.bytes=4096
+auto.create.topics.enable=true
+controller.socket.timeout.ms=30000
+controller.message.queue.size=10
+default.replication.factor=3
+replica.lag.time.max.ms=10000
+replica.lag.max.messages=4000
+replica.socket.timeout.ms=30000
+replica.socket.receive.buffer.bytes=65536
+replica.fetch.max.bytes=1048576
+replica.fetch.wait.max.ms=500
+replica.fetch.min.bytes=4096
+num.replica.fetchers=1
+
+offsets.topic.num.partitions=2
+offsets.topic.replication.factor=4
+
+kafka.csv.metrics.dir=/home/jkoshy/Projects/kafka/system_test/offset_management_testsuite/testcase_7002/logs/broker-4/metrics
+kafka.csv.metrics.reporter.enabled=true
+kafka.metrics.polling.interval.secs=5
+kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter
diff --git a/system_test/mirror_maker/config/zookeeper_source_2.properties b/system_test/offset_management_testsuite/testcase_7002/config/zookeeper_0.properties
similarity index 79%
rename from system_test/mirror_maker/config/zookeeper_source_2.properties
rename to system_test/offset_management_testsuite/testcase_7002/config/zookeeper_0.properties
index d534d180daf88..97c07b9cd470d 100644
--- a/system_test/mirror_maker/config/zookeeper_source_2.properties
+++ b/system_test/offset_management_testsuite/testcase_7002/config/zookeeper_0.properties
@@ -13,6 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # the directory where the snapshot is stored.
-dataDir=/tmp/zookeeper_source-2
+dataDir=/tmp/zookeeper_0
 # the port at which the clients will connect
-clientPort=2182
+clientPort=2108
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
+syncLimit=5
+initLimit=10
+tickTime=2000
+server.1=localhost:2107:2109
diff --git a/system_test/offset_management_testsuite/testcase_7002/testcase_7002_properties.json b/system_test/offset_management_testsuite/testcase_7002/testcase_7002_properties.json
new file mode 100644
index 0000000000000..c5866a2ecc42e
--- /dev/null
+++ b/system_test/offset_management_testsuite/testcase_7002/testcase_7002_properties.json
@@ -0,0 +1,127 @@
+{
+  "description": {"01":"To Test : 'Basic offset management test.'",
+                  "02":"Set up a Zk and Kafka cluster.",
+                  "03":"Produce messages to a multiple topics - various partition counts.",
+                  "04":"Start multiple consumer groups to read various subsets of above topics.",
+                  "05":"Bounce consumers.",
+                  "06":"Verify that there are no duplicate messages or lost messages on any consumer group.",
+                  "07":"Producer dimensions : mode:sync, acks:-1, comp:0"
+  },
+  "testcase_args": {
+    "bounce_leaders": "false",
+    "bounce_consumers": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50",
+    "num_topics_for_auto_generated_string":"3"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2108",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_0.log",
+      "config_filename": "zookeeper_0.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_1.log",
+      "config_filename": "kafka_server_1.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_2.log",
+      "config_filename": "kafka_server_2.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_3.log",
+      "config_filename": "kafka_server_3.properties"
+    },
+    {
+      "entity_id": "4",
+      "port": "9094",
+      "broker.id": "4",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_4_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "5",
+      "log_filename": "kafka_server_4.log",
+      "config_filename": "kafka_server_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "1000",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "producer-num-retries":"5",
+      "log_filename": "producer_performance_10.log",
+      "config_filename": "producer_performance_10.properties"
+    },
+    {
+      "entity_id": "6",
+      "topic": "test_0001",
+      "group.id": "group1",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "topic": "test_0002",
+      "group.id": "group2",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_7.properties"
+    },
+    {
+      "entity_id": "8",
+      "topic": "test_0002",
+      "group.id": "group2",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_8.properties"
+    },
+    {
+      "entity_id": "9",
+      "topic": "test_0002",
+      "group.id": "group2",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_9.properties"
+    },
+    {
+      "entity_id": "10",
+      "topic": "test_0003",
+      "group.id": "group2",
+      "consumer-timeout-ms": "30000",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer_10.properties"
+    }
+   ]
+}
diff --git a/system_test/producer_perf/bin/run-compression-test.sh b/system_test/producer_perf/bin/run-compression-test.sh
index ea20f0dbd803e..5297d1f93e3fa 100755
--- a/system_test/producer_perf/bin/run-compression-test.sh
+++ b/system_test/producer_perf/bin/run-compression-test.sh
@@ -5,9 +5,9 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,9 +28,9 @@ $base_dir/../../bin/kafka-server-start.sh $base_dir/config/server.properties 2>&
 
 sleep 4
 echo "start producing $num_messages messages ..."
-$base_dir/../../bin/kafka-run-class.sh kafka.perf.ProducerPerformance --brokerinfo broker.list=0:localhost:9092 --topics test01 --messages $num_messages --message-size $message_size --batch-size 200 --threads 1 --reporting-interval 100000 num_messages --async --compression-codec 1 
+$base_dir/../../bin/kafka-run-class.sh kafka.tools.ProducerPerformance --brokerinfo broker.list=0:localhost:9092 --topics test01 --messages $num_messages --message-size $message_size --batch-size 200 --threads 1 --reporting-interval 100000 num_messages --async --compression-codec 1
 
-echo "wait for data to be persisted" 
+echo "wait for data to be persisted"
 cur_offset="-1"
 quit=0
 while [ $quit -eq 0 ]
@@ -59,4 +59,3 @@ fi
 ps ax | grep -i 'kafka.kafka' | grep -v grep | awk '{print $1}' | xargs kill -15 > /dev/null
 sleep 2
 ps ax | grep -i 'QuorumPeerMain' | grep -v grep | awk '{print $1}' | xargs kill -15 > /dev/null
-
diff --git a/system_test/producer_perf/bin/run-test.sh b/system_test/producer_perf/bin/run-test.sh
index bb60817edda2b..9a3b8858a9b27 100755
--- a/system_test/producer_perf/bin/run-test.sh
+++ b/system_test/producer_perf/bin/run-test.sh
@@ -5,9 +5,9 @@
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
-# 
+#
 #    http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,9 +28,9 @@ $base_dir/../../bin/kafka-server-start.sh $base_dir/config/server.properties 2>&
 
 sleep 4
 echo "start producing $num_messages messages ..."
-$base_dir/../../bin/kafka-run-class.sh kafka.perf.ProducerPerformance --brokerinfo broker.list=0:localhost:9092 --topics test01 --messages $num_messages --message-size $message_size --batch-size 200 --threads 1 --reporting-interval 100000 num_messages --async
+$base_dir/../../bin/kafka-run-class.sh kafka.tools.ProducerPerformance --brokerinfo broker.list=0:localhost:9092 --topics test01 --messages $num_messages --message-size $message_size --batch-size 200 --threads 1 --reporting-interval 100000 num_messages --async
 
-echo "wait for data to be persisted" 
+echo "wait for data to be persisted"
 cur_offset="-1"
 quit=0
 while [ $quit -eq 0 ]
@@ -59,4 +59,3 @@ fi
 ps ax | grep -i 'kafka.kafka' | grep -v grep | awk '{print $1}' | xargs kill -15 > /dev/null
 sleep 2
 ps ax | grep -i 'QuorumPeerMain' | grep -v grep | awk '{print $1}' | xargs kill -15 > /dev/null
-
diff --git a/system_test/producer_perf/config/server.properties b/system_test/producer_perf/config/server.properties
index 9f8a633ab2f92..83a1e06794c7f 100644
--- a/system_test/producer_perf/config/server.properties
+++ b/system_test/producer_perf/config/server.properties
@@ -60,10 +60,10 @@ enable.zookeeper=true
 # zk connection string
 # comma separated host:port pairs, each corresponding to a zk
 # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
-zk.connect=localhost:2181
+zookeeper.connect=localhost:2181
 
 # timeout in ms for connecting to zookeeper
-zk.connection.timeout.ms=1000000
+zookeeper.connection.timeout.ms=1000000
 
 # time based topic flush intervals in ms
 #log.flush.intervals.ms.per.topic=topic:1000
diff --git a/system_test/replication_testsuite/config/server.properties b/system_test/replication_testsuite/config/server.properties
index c6284122e3dfa..6becbab60e394 100644
--- a/system_test/replication_testsuite/config/server.properties
+++ b/system_test/replication_testsuite/config/server.properties
@@ -136,5 +136,5 @@ replica.socket.timeout.ms=30000
 replica.socket.receive.buffer.bytes=65536
 replica.fetch.max.bytes=1048576
 replica.fetch.wait.max.ms=500
-replica.fetch.min.bytes=4096
+replica.fetch.min.bytes=1
 num.replica.fetchers=1
diff --git a/system_test/replication_testsuite/replica_basic_test.py b/system_test/replication_testsuite/replica_basic_test.py
index e20130b85aecd..16a24a407051a 100644
--- a/system_test/replication_testsuite/replica_basic_test.py
+++ b/system_test/replication_testsuite/replica_basic_test.py
@@ -188,7 +188,7 @@ def runTest(self):
 
                 if autoCreateTopic.lower() == "false":
                     self.log_message("creating topics")
-                    kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv)
+                    kafka_system_test_utils.create_topic_for_producer_performance(self.systemTestEnv, self.testcaseEnv)
                     self.anonLogger.info("sleeping for 5s")
                     time.sleep(5)
 
@@ -363,6 +363,7 @@ def runTest(self):
                         str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d)
                     if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]:
                         time.sleep(1)
+                        self.testcaseEnv.lock.release()
                         self.logger.info("all producer threads completed", extra=self.d)
                         break
                     time.sleep(1)
@@ -450,6 +451,8 @@ def runTest(self):
             except Exception as e:
                 self.log_message("Exception while running test {0}".format(e))
                 traceback.print_exc()
+                self.testcaseEnv.validationStatusDict["Test completed"] = "FAILED"
+
 
             finally:
                 if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly:
diff --git a/system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json b/system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json
index 9bc164bee1dd0..250ffe08ae4c2 100644
--- a/system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json
+++ b/system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json b/system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json
index c90d7536a7564..3f9e7d05db17e 100644
--- a/system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json
+++ b/system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:sync, acks:-1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json b/system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json
index b62b8aa50a38c..b10c6266a786a 100644
--- a/system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json
+++ b/system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:sync, acks:1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json b/system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json
index b91cef88533cb..22c08023f3d9e 100644
--- a/system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json
+++ b/system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json b/system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json
index 4b3f76a21642d..1317d8460f82c 100644
--- a/system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json
+++ b/system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json b/system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json
index b9b3485c46b61..d31346506a19d 100644
--- a/system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json
+++ b/system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:sync, acks:-1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json b/system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json
index 5c4351fad98ef..bd00f130d7fc6 100644
--- a/system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json
+++ b/system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json b/system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json
index 79cfed8bc63e4..ac5c13b09a84f 100644
--- a/system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json
+++ b/system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:sync, acks:1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json b/system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json
index a52b709579f58..6dadd7a288664 100644
--- a/system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json
+++ b/system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json
@@ -5,7 +5,7 @@
                   "04":"At the end it verifies the log size and contents",
                   "05":"Use a consumer to verify no message loss.",
                   "06":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "07":"Log segment size    : 10240"
+                  "07":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -29,7 +29,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -40,7 +40,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -51,7 +51,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json b/system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json
index 8d4b5fea2d1f9..e25ddb917dd1a 100644
--- a/system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json
+++ b/system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0011/testcase_0011_properties.json b/system_test/replication_testsuite/testcase_0011/testcase_0011_properties.json
index b03f9cf059899..ac17570034037 100644
--- a/system_test/replication_testsuite/testcase_0011/testcase_0011_properties.json
+++ b/system_test/replication_testsuite/testcase_0011/testcase_0011_properties.json
@@ -61,6 +61,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json b/system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json
index 40c2f8df13051..f35a439782057 100644
--- a/system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json
+++ b/system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json
@@ -59,6 +59,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -72,6 +73,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json b/system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json
index 7a3046035329f..5a168f3f5cfab 100644
--- a/system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json
+++ b/system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json
@@ -59,6 +59,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -72,6 +73,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json b/system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json
index d921f01f332a4..09d81a6584a0c 100644
--- a/system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json
+++ b/system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json
@@ -59,6 +59,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -72,6 +73,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0024/testcase_0024_properties.json b/system_test/replication_testsuite/testcase_0024/testcase_0024_properties.json
index 839eb1476981e..5661b88b14fdb 100644
--- a/system_test/replication_testsuite/testcase_0024/testcase_0024_properties.json
+++ b/system_test/replication_testsuite/testcase_0024/testcase_0024_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1,test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json b/system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json
index 85e4b61751b12..614cb1cf01d3c 100644
--- a/system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json
+++ b/system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json b/system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json
index 0d2f59fbcaa30..fbf01772545f0 100644
--- a/system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json
+++ b/system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:-1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json b/system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json
index 34acfa9d5c684..317a6e3d33840 100644
--- a/system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json
+++ b/system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json b/system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json
index 4145345fc433a..d1a790b34fa5b 100644
--- a/system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json
+++ b/system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json b/system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json
index 2eecc768e7e97..8f4f8bfac7146 100644
--- a/system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json
+++ b/system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "1",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json b/system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json
index 744174eab1c82..a307f855c9449 100644
--- a/system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json
+++ b/system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:-1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json b/system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json
index e881b138c1fc0..827319eb6c6db 100644
--- a/system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json
+++ b/system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:async, acks:-1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json b/system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json
index 7b48fdb441ad5..ae015cd0eb82b 100644
--- a/system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json
+++ b/system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json b/system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json
index a98ae0390bf55..1951a1220956f 100644
--- a/system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json
+++ b/system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:async, acks:1, comp:1",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -43,7 +43,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -54,7 +54,7 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "default.replication.factor": "3",
       "num.partitions": "3",
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json b/system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json
index f51abc156ae63..f11c705d92ca3 100644
--- a/system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json
+++ b/system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json b/system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json
index fff0d689eba5f..cc1eae690f67a 100644
--- a/system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json
+++ b/system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json b/system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json
index 636f02a2342d8..48a6c9d2351a5 100644
--- a/system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json
+++ b/system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json b/system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json
index bdb885e43829e..a88b49b4363df 100644
--- a/system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json
+++ b/system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json b/system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json
index ca51c5f5f183f..1261614396166 100644
--- a/system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json
+++ b/system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json b/system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json
index 459becf251d7c..2d649da83b01e 100644
--- a/system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json
+++ b/system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json b/system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json
index c9471bc62ac6a..cbad6f2b7b9f7 100644
--- a/system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json
+++ b/system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json b/system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json
index 8159464b16c2c..0099a8f5db4e9 100644
--- a/system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json
+++ b/system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json b/system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json
index f73f0e43a3cf7..6954d4396276a 100644
--- a/system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json
+++ b/system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0119/testcase_0119_properties.json b/system_test/replication_testsuite/testcase_0119/testcase_0119_properties.json
index 442a481230b15..ab1e47a87c624 100644
--- a/system_test/replication_testsuite/testcase_0119/testcase_0119_properties.json
+++ b/system_test/replication_testsuite/testcase_0119/testcase_0119_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json b/system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json
index 606aad399dd3a..c7940c460566c 100644
--- a/system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json
+++ b/system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -76,6 +77,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json b/system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json
index ea475361c8c54..35daf5b4cc3d6 100644
--- a/system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json
+++ b/system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -76,6 +77,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json b/system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json
index f9c92733cf465..fe5e49af72d82 100644
--- a/system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json
+++ b/system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -76,6 +77,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0124/testcase_0124_properties.json b/system_test/replication_testsuite/testcase_0124/testcase_0124_properties.json
index 02f8506b20d34..bff5d7342fc83 100644
--- a/system_test/replication_testsuite/testcase_0124/testcase_0124_properties.json
+++ b/system_test/replication_testsuite/testcase_0124/testcase_0124_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0125/testcase_0125_properties.json b/system_test/replication_testsuite/testcase_0125/testcase_0125_properties.json
index 00482791aa969..1f57ecc0adac1 100644
--- a/system_test/replication_testsuite/testcase_0125/testcase_0125_properties.json
+++ b/system_test/replication_testsuite/testcase_0125/testcase_0125_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0126/testcase_0126_properties.json b/system_test/replication_testsuite/testcase_0126/testcase_0126_properties.json
index 5119e61bbfca3..ffa0fc31fc972 100644
--- a/system_test/replication_testsuite/testcase_0126/testcase_0126_properties.json
+++ b/system_test/replication_testsuite/testcase_0126/testcase_0126_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0127/testcase_0127_properties.json b/system_test/replication_testsuite/testcase_0127/testcase_0127_properties.json
index 8b53fa7df5056..78ecd8fa3f871 100644
--- a/system_test/replication_testsuite/testcase_0127/testcase_0127_properties.json
+++ b/system_test/replication_testsuite/testcase_0127/testcase_0127_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0128/testcase_0128_properties.json b/system_test/replication_testsuite/testcase_0128/testcase_0128_properties.json
index e8edb9f4c7009..589eb2068d6eb 100644
--- a/system_test/replication_testsuite/testcase_0128/testcase_0128_properties.json
+++ b/system_test/replication_testsuite/testcase_0128/testcase_0128_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1,test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0131/testcase_0131_properties.json b/system_test/replication_testsuite/testcase_0131/testcase_0131_properties.json
index a140882287b9c..0324b6f327cb7 100644
--- a/system_test/replication_testsuite/testcase_0131/testcase_0131_properties.json
+++ b/system_test/replication_testsuite/testcase_0131/testcase_0131_properties.json
@@ -66,6 +66,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -79,6 +80,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0132/testcase_0132_properties.json b/system_test/replication_testsuite/testcase_0132/testcase_0132_properties.json
index 48b30c7e014f0..83bcaaac3a6e5 100644
--- a/system_test/replication_testsuite/testcase_0132/testcase_0132_properties.json
+++ b/system_test/replication_testsuite/testcase_0132/testcase_0132_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -76,6 +77,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0133/testcase_0133_properties.json b/system_test/replication_testsuite/testcase_0133/testcase_0133_properties.json
index 8276aae0aa822..2a1eaa51efb58 100644
--- a/system_test/replication_testsuite/testcase_0133/testcase_0133_properties.json
+++ b/system_test/replication_testsuite/testcase_0133/testcase_0133_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -76,6 +77,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0134/testcase_0134_properties.json b/system_test/replication_testsuite/testcase_0134/testcase_0134_properties.json
index 73bb859952262..0a98ce5bb4ffe 100644
--- a/system_test/replication_testsuite/testcase_0134/testcase_0134_properties.json
+++ b/system_test/replication_testsuite/testcase_0134/testcase_0134_properties.json
@@ -68,6 +68,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1,test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0151/testcase_0151_properties.json b/system_test/replication_testsuite/testcase_0151/testcase_0151_properties.json
index eebba4db5d978..237a34388f2bc 100644
--- a/system_test/replication_testsuite/testcase_0151/testcase_0151_properties.json
+++ b/system_test/replication_testsuite/testcase_0151/testcase_0151_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0152/testcase_0152_properties.json b/system_test/replication_testsuite/testcase_0152/testcase_0152_properties.json
index debf544b3541a..8d57610851775 100644
--- a/system_test/replication_testsuite/testcase_0152/testcase_0152_properties.json
+++ b/system_test/replication_testsuite/testcase_0152/testcase_0152_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0153/testcase_0153_properties.json b/system_test/replication_testsuite/testcase_0153/testcase_0153_properties.json
index 57b7d985236c8..89b933f674357 100644
--- a/system_test/replication_testsuite/testcase_0153/testcase_0153_properties.json
+++ b/system_test/replication_testsuite/testcase_0153/testcase_0153_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0154/testcase_0154_properties.json b/system_test/replication_testsuite/testcase_0154/testcase_0154_properties.json
index c09fab719459d..fe3f98fb36b5d 100644
--- a/system_test/replication_testsuite/testcase_0154/testcase_0154_properties.json
+++ b/system_test/replication_testsuite/testcase_0154/testcase_0154_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0155/testcase_0155_properties.json b/system_test/replication_testsuite/testcase_0155/testcase_0155_properties.json
index dd5ac52f86b02..7f9ced8990704 100644
--- a/system_test/replication_testsuite/testcase_0155/testcase_0155_properties.json
+++ b/system_test/replication_testsuite/testcase_0155/testcase_0155_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0156/testcase_0156_properties.json b/system_test/replication_testsuite/testcase_0156/testcase_0156_properties.json
index 8236ca58994b5..ec1e83cc11dc8 100644
--- a/system_test/replication_testsuite/testcase_0156/testcase_0156_properties.json
+++ b/system_test/replication_testsuite/testcase_0156/testcase_0156_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0157/testcase_0157_properties.json b/system_test/replication_testsuite/testcase_0157/testcase_0157_properties.json
index a28bf81e2b79e..e96ed325d7724 100644
--- a/system_test/replication_testsuite/testcase_0157/testcase_0157_properties.json
+++ b/system_test/replication_testsuite/testcase_0157/testcase_0157_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0158/testcase_0158_properties.json b/system_test/replication_testsuite/testcase_0158/testcase_0158_properties.json
index 3d6edbdb16c58..7ca29427d9c58 100644
--- a/system_test/replication_testsuite/testcase_0158/testcase_0158_properties.json
+++ b/system_test/replication_testsuite/testcase_0158/testcase_0158_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0159/testcase_0159_properties.json b/system_test/replication_testsuite/testcase_0159/testcase_0159_properties.json
index 030c9e8f0560f..cf7ccc3911ea0 100644
--- a/system_test/replication_testsuite/testcase_0159/testcase_0159_properties.json
+++ b/system_test/replication_testsuite/testcase_0159/testcase_0159_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0201/testcase_0201_properties.json b/system_test/replication_testsuite/testcase_0201/testcase_0201_properties.json
index c6f8a23f358c5..521592b4d235c 100644
--- a/system_test/replication_testsuite/testcase_0201/testcase_0201_properties.json
+++ b/system_test/replication_testsuite/testcase_0201/testcase_0201_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0202/testcase_0202_properties.json b/system_test/replication_testsuite/testcase_0202/testcase_0202_properties.json
index 7b1a4c25c73fb..c2feeb870ae9a 100644
--- a/system_test/replication_testsuite/testcase_0202/testcase_0202_properties.json
+++ b/system_test/replication_testsuite/testcase_0202/testcase_0202_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0203/testcase_0203_properties.json b/system_test/replication_testsuite/testcase_0203/testcase_0203_properties.json
index 47276a85a038e..83b4dbceb26e0 100644
--- a/system_test/replication_testsuite/testcase_0203/testcase_0203_properties.json
+++ b/system_test/replication_testsuite/testcase_0203/testcase_0203_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0204/testcase_0204_properties.json b/system_test/replication_testsuite/testcase_0204/testcase_0204_properties.json
index 3742cfad8f251..629b7baf43fff 100644
--- a/system_test/replication_testsuite/testcase_0204/testcase_0204_properties.json
+++ b/system_test/replication_testsuite/testcase_0204/testcase_0204_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0205/testcase_0205_properties.json b/system_test/replication_testsuite/testcase_0205/testcase_0205_properties.json
index d41672d0b2818..a9d13c380ddc1 100644
--- a/system_test/replication_testsuite/testcase_0205/testcase_0205_properties.json
+++ b/system_test/replication_testsuite/testcase_0205/testcase_0205_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0206/testcase_0206_properties.json b/system_test/replication_testsuite/testcase_0206/testcase_0206_properties.json
index a32d8882deb14..e31666917ab32 100644
--- a/system_test/replication_testsuite/testcase_0206/testcase_0206_properties.json
+++ b/system_test/replication_testsuite/testcase_0206/testcase_0206_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0207/testcase_0207_properties.json b/system_test/replication_testsuite/testcase_0207/testcase_0207_properties.json
index 573787099f92c..1e08f46af4578 100644
--- a/system_test/replication_testsuite/testcase_0207/testcase_0207_properties.json
+++ b/system_test/replication_testsuite/testcase_0207/testcase_0207_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0208/testcase_0208_properties.json b/system_test/replication_testsuite/testcase_0208/testcase_0208_properties.json
index c3d1d2cb90ce2..1dd38f478a820 100644
--- a/system_test/replication_testsuite/testcase_0208/testcase_0208_properties.json
+++ b/system_test/replication_testsuite/testcase_0208/testcase_0208_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0209/testcase_0209_properties.json b/system_test/replication_testsuite/testcase_0209/testcase_0209_properties.json
index 8d2cebafa0fb3..ac6b4d0013b66 100644
--- a/system_test/replication_testsuite/testcase_0209/testcase_0209_properties.json
+++ b/system_test/replication_testsuite/testcase_0209/testcase_0209_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0251/testcase_0251_properties.json b/system_test/replication_testsuite/testcase_0251/testcase_0251_properties.json
index 76ea6775657c9..9f06f3072af84 100644
--- a/system_test/replication_testsuite/testcase_0251/testcase_0251_properties.json
+++ b/system_test/replication_testsuite/testcase_0251/testcase_0251_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0252/testcase_0252_properties.json b/system_test/replication_testsuite/testcase_0252/testcase_0252_properties.json
index 672b3ff883f27..c264fca3876d4 100644
--- a/system_test/replication_testsuite/testcase_0252/testcase_0252_properties.json
+++ b/system_test/replication_testsuite/testcase_0252/testcase_0252_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0253/testcase_0253_properties.json b/system_test/replication_testsuite/testcase_0253/testcase_0253_properties.json
index 19bd096b83735..e5fdb2eab95d6 100644
--- a/system_test/replication_testsuite/testcase_0253/testcase_0253_properties.json
+++ b/system_test/replication_testsuite/testcase_0253/testcase_0253_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0254/testcase_0254_properties.json b/system_test/replication_testsuite/testcase_0254/testcase_0254_properties.json
index 8d6add098da11..27ce4e97ec01b 100644
--- a/system_test/replication_testsuite/testcase_0254/testcase_0254_properties.json
+++ b/system_test/replication_testsuite/testcase_0254/testcase_0254_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0255/testcase_0255_properties.json b/system_test/replication_testsuite/testcase_0255/testcase_0255_properties.json
index 2a7e7776214d6..1148a4546b48a 100644
--- a/system_test/replication_testsuite/testcase_0255/testcase_0255_properties.json
+++ b/system_test/replication_testsuite/testcase_0255/testcase_0255_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0256/testcase_0256_properties.json b/system_test/replication_testsuite/testcase_0256/testcase_0256_properties.json
index 91fbc9da75e29..1b58e9bd9bf13 100644
--- a/system_test/replication_testsuite/testcase_0256/testcase_0256_properties.json
+++ b/system_test/replication_testsuite/testcase_0256/testcase_0256_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0257/testcase_0257_properties.json b/system_test/replication_testsuite/testcase_0257/testcase_0257_properties.json
index 5594a9bc76d1c..42e33c265a769 100644
--- a/system_test/replication_testsuite/testcase_0257/testcase_0257_properties.json
+++ b/system_test/replication_testsuite/testcase_0257/testcase_0257_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0258/testcase_0258_properties.json b/system_test/replication_testsuite/testcase_0258/testcase_0258_properties.json
index 6eabd3772dbe6..ae9ce5eb72d7c 100644
--- a/system_test/replication_testsuite/testcase_0258/testcase_0258_properties.json
+++ b/system_test/replication_testsuite/testcase_0258/testcase_0258_properties.json
@@ -64,6 +64,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0259/testcase_0259_properties.json b/system_test/replication_testsuite/testcase_0259/testcase_0259_properties.json
index 3bbeeb10adfd8..7278327226412 100644
--- a/system_test/replication_testsuite/testcase_0259/testcase_0259_properties.json
+++ b/system_test/replication_testsuite/testcase_0259/testcase_0259_properties.json
@@ -65,6 +65,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0301/testcase_0301_properties.json b/system_test/replication_testsuite/testcase_0301/testcase_0301_properties.json
index 496340c574ec3..f9b775e04b2ec 100644
--- a/system_test/replication_testsuite/testcase_0301/testcase_0301_properties.json
+++ b/system_test/replication_testsuite/testcase_0301/testcase_0301_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0302/testcase_0302_properties.json b/system_test/replication_testsuite/testcase_0302/testcase_0302_properties.json
index beabc86e2cdba..af468c3a733bb 100644
--- a/system_test/replication_testsuite/testcase_0302/testcase_0302_properties.json
+++ b/system_test/replication_testsuite/testcase_0302/testcase_0302_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0303/testcase_0303_properties.json b/system_test/replication_testsuite/testcase_0303/testcase_0303_properties.json
index 0e8f5a45895a3..374ff9e868524 100644
--- a/system_test/replication_testsuite/testcase_0303/testcase_0303_properties.json
+++ b/system_test/replication_testsuite/testcase_0303/testcase_0303_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0304/testcase_0304_properties.json b/system_test/replication_testsuite/testcase_0304/testcase_0304_properties.json
index 98a385556c781..1b0f2eea4cc68 100644
--- a/system_test/replication_testsuite/testcase_0304/testcase_0304_properties.json
+++ b/system_test/replication_testsuite/testcase_0304/testcase_0304_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_0305/testcase_0305_properties.json b/system_test/replication_testsuite/testcase_0305/testcase_0305_properties.json
index e09ac9c09b971..568de4bb3b734 100644
--- a/system_test/replication_testsuite/testcase_0305/testcase_0305_properties.json
+++ b/system_test/replication_testsuite/testcase_0305/testcase_0305_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0306/testcase_0306_properties.json b/system_test/replication_testsuite/testcase_0306/testcase_0306_properties.json
index bcfe91b08c7e2..ab933380abbfe 100644
--- a/system_test/replication_testsuite/testcase_0306/testcase_0306_properties.json
+++ b/system_test/replication_testsuite/testcase_0306/testcase_0306_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0307/testcase_0307_properties.json b/system_test/replication_testsuite/testcase_0307/testcase_0307_properties.json
index da07a9e4751b5..06b0623841149 100644
--- a/system_test/replication_testsuite/testcase_0307/testcase_0307_properties.json
+++ b/system_test/replication_testsuite/testcase_0307/testcase_0307_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0308/testcase_0308_properties.json b/system_test/replication_testsuite/testcase_0308/testcase_0308_properties.json
index 0fecafc1a49c6..0fda7c615490f 100644
--- a/system_test/replication_testsuite/testcase_0308/testcase_0308_properties.json
+++ b/system_test/replication_testsuite/testcase_0308/testcase_0308_properties.json
@@ -62,6 +62,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_0309/testcase_0309_properties.json b/system_test/replication_testsuite/testcase_0309/testcase_0309_properties.json
index 90bd40442cc95..2879c8ff9bbcc 100644
--- a/system_test/replication_testsuite/testcase_0309/testcase_0309_properties.json
+++ b/system_test/replication_testsuite/testcase_0309/testcase_0309_properties.json
@@ -63,6 +63,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_1/testcase_1_properties.json b/system_test/replication_testsuite/testcase_1/testcase_1_properties.json
index 7e1b3fbf2d22b..0c6d7a316cc6b 100644
--- a/system_test/replication_testsuite/testcase_1/testcase_1_properties.json
+++ b/system_test/replication_testsuite/testcase_1/testcase_1_properties.json
@@ -8,7 +8,7 @@
                   "07":"At the end it verifies the log size and contents",
                   "08":"Use a consumer to verify no message loss.",
                   "09":"Producer dimensions : mode:sync, acks:-1, comp:0",
-                  "10":"Log segment size    : 10240"
+                  "10":"Log segment size    : 20480"
   },
   "testcase_args": {
     "broker_type": "leader",
@@ -32,7 +32,7 @@
       "entity_id": "1",
       "port": "9091",
       "broker.id": "1",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_1_logs",
       "log_filename": "kafka_server_9091.log",
       "config_filename": "kafka_server_9091.properties"
@@ -41,7 +41,7 @@
       "entity_id": "2",
       "port": "9092",
       "broker.id": "2",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_2_logs",
       "log_filename": "kafka_server_9092.log",
       "config_filename": "kafka_server_9092.properties"
@@ -50,13 +50,14 @@
       "entity_id": "3",
       "port": "9093",
       "broker.id": "3",
-      "log.segment.bytes": "10240",
+      "log.segment.bytes": "20480",
       "log.dir": "/tmp/kafka_server_3_logs",
       "log_filename": "kafka_server_9093.log",
       "config_filename": "kafka_server_9093.properties"
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_10101/testcase_10101_properties.json b/system_test/replication_testsuite/testcase_10101/testcase_10101_properties.json
new file mode 100644
index 0000000000000..3f8e5870799b7
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10101/testcase_10101_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : Base Test",
+                  "02":"Produce and consume messages to a single topic - single partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:-1, comp:0",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10102/testcase_10102_properties.json b/system_test/replication_testsuite/testcase_10102/testcase_10102_properties.json
new file mode 100644
index 0000000000000..c96352d5e18e6
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10102/testcase_10102_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. comp => 1",
+                  "02":"Produce and consume messages to a single topic - single partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:-1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10103/testcase_10103_properties.json b/system_test/replication_testsuite/testcase_10103/testcase_10103_properties.json
new file mode 100644
index 0000000000000..55fa39e148818
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10103/testcase_10103_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. acks => 1; 2. comp => 1",
+                  "02":"Produce and consume messages to a single topic - single partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"true",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10104/testcase_10104_properties.json b/system_test/replication_testsuite/testcase_10104/testcase_10104_properties.json
new file mode 100644
index 0000000000000..15827eb994ae8
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10104/testcase_10104_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. comp => 0",
+                  "02":"Produce and consume messages to a single topic - single partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:-1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"false",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10105/testcase_10105_properties.json b/system_test/replication_testsuite/testcase_10105/testcase_10105_properties.json
new file mode 100644
index 0000000000000..d1fa1ade07c53
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10105/testcase_10105_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp => 1",
+                  "02":"Produce and consume messages to a single topic - single partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "1",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "1",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10106/testcase_10106_properties.json b/system_test/replication_testsuite/testcase_10106/testcase_10106_properties.json
new file mode 100644
index 0000000000000..675c76f656efa
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10106/testcase_10106_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. comp => 1; 2. no of partion => 3",
+                  "02":"Produce and consume messages to a single topic - 3 partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:-1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "3",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"true",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10107/testcase_10107_properties.json b/system_test/replication_testsuite/testcase_10107/testcase_10107_properties.json
new file mode 100644
index 0000000000000..afc221cdc8bf7
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10107/testcase_10107_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. comp => 1; 3. no of partition => 3",
+                  "02":"Produce and consume messages to a single topic - 3 partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:-1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "3",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "sync":"false",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10108/testcase_10108_properties.json b/system_test/replication_testsuite/testcase_10108/testcase_10108_properties.json
new file mode 100644
index 0000000000000..5df72f3d52a6c
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10108/testcase_10108_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. acks => 1; 2. comp => 1; 3. no. of partition => 3",
+                  "02":"Produce and consume messages to a single topic - 3 partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "3",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"true",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10109/testcase_10109_properties.json b/system_test/replication_testsuite/testcase_10109/testcase_10109_properties.json
new file mode 100644
index 0000000000000..9b156681ec485
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10109/testcase_10109_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp =>; 4. no. of partitions => 3",
+                  "02":"Produce and consume messages to a single topic - 3 partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "10":"Log segment size    : 20480"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "3",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "20480",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10110/testcase_10110_properties.json b/system_test/replication_testsuite/testcase_10110/testcase_10110_properties.json
new file mode 100644
index 0000000000000..f51abc156ae63
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10110/testcase_10110_properties.json
@@ -0,0 +1,86 @@
+{
+  "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp =>; 4. no. of partitins => 3; 5. log segment size => 1M",
+                  "02":"Produce and consume messages to a single topic - 3 partition.",
+                  "03":"This test sends messages to 3 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "10":"Log segment size    : 1048576 (1M)"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "3",
+    "num_partition": "3",
+    "num_iteration": "1",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15",
+    "num_messages_to_produce_per_producer_call": "50"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "1048576",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "1048576",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "1048576",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "3",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "sync":"false",
+      "log_filename": "producer_performance.log",
+      "config_filename": "producer_performance.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "zookeeper": "localhost:2188",
+      "log_filename": "console_consumer.log",
+      "config_filename": "console_consumer.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10131/cluster_config.json b/system_test/replication_testsuite/testcase_10131/cluster_config.json
new file mode 100644
index 0000000000000..cf147eb3f2024
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10131/cluster_config.json
@@ -0,0 +1,76 @@
+{
+    "cluster_config": [
+        {
+            "entity_id": "0",
+            "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9990"
+        },
+        {
+            "entity_id": "1",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9991"
+        },
+        {
+            "entity_id": "2",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9992"
+        },
+        {
+            "entity_id": "3",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9993"
+        },
+        {
+            "entity_id": "4",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9997"
+        },
+        {
+            "entity_id": "5",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9998"
+        },
+        {
+            "entity_id": "6",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9999"
+        },
+        {
+            "entity_id": "7",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9099"
+        }
+    ]
+}
diff --git a/system_test/replication_testsuite/testcase_10131/testcase_10131_properties.json b/system_test/replication_testsuite/testcase_10131/testcase_10131_properties.json
new file mode 100644
index 0000000000000..a140882287b9c
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10131/testcase_10131_properties.json
@@ -0,0 +1,110 @@
+{
+  "description": {"01":"Leader Failure in Replication with multi topics & partitions : Base Test",
+                  "02":"Produce and consume messages to 2 topics - 3 partitions",
+                  "03":"This test sends messages to 2 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:-1, comp:0",
+                  "10":"Log segment size    : 102400"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "auto_create_topic": "true",
+    "bounce_broker": "true",
+    "replica_factor": "2",
+    "num_partition": "3",
+    "num_iteration": "3",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"true",
+      "log_filename": "producer_performance_4.log",
+      "config_filename": "producer_performance_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_2",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"true",
+      "log_filename": "producer_performance_5.log",
+      "config_filename": "producer_performance_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_6.log",
+      "config_filename": "console_consumer_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "topic": "test_2",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_7.log",
+      "config_filename": "console_consumer_7.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10132/cluster_config.json b/system_test/replication_testsuite/testcase_10132/cluster_config.json
new file mode 100644
index 0000000000000..cf147eb3f2024
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10132/cluster_config.json
@@ -0,0 +1,76 @@
+{
+    "cluster_config": [
+        {
+            "entity_id": "0",
+            "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9990"
+        },
+        {
+            "entity_id": "1",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9991"
+        },
+        {
+            "entity_id": "2",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9992"
+        },
+        {
+            "entity_id": "3",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9993"
+        },
+        {
+            "entity_id": "4",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9997"
+        },
+        {
+            "entity_id": "5",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9998"
+        },
+        {
+            "entity_id": "6",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9999"
+        },
+        {
+            "entity_id": "7",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9099"
+        }
+    ]
+}
diff --git a/system_test/replication_testsuite/testcase_10132/testcase_10132_properties.json b/system_test/replication_testsuite/testcase_10132/testcase_10132_properties.json
new file mode 100644
index 0000000000000..48b30c7e014f0
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10132/testcase_10132_properties.json
@@ -0,0 +1,107 @@
+{
+  "description": {"01":"Leader Failure in Replication with multi topics & partitions : 1. acks => 1",
+                  "02":"Produce and consume messages to 2 topics - 3 partitions",
+                  "03":"This test sends messages to 2 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:1, comp:0",
+                  "10":"Log segment size    : 512000"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "auto_create_topic": "true",
+    "bounce_broker": "true",
+    "replica_factor": "2",
+    "num_partition": "3",
+    "num_iteration": "3",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"true",
+      "log_filename": "producer_performance_4.log",
+      "config_filename": "producer_performance_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_2",
+      "threads": "5",
+      "compression-codec": "0",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"true",
+      "log_filename": "producer_performance_5.log",
+      "config_filename": "producer_performance_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_6.log",
+      "config_filename": "console_consumer_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "topic": "test_2",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_7.log",
+      "config_filename": "console_consumer_7.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10133/cluster_config.json b/system_test/replication_testsuite/testcase_10133/cluster_config.json
new file mode 100644
index 0000000000000..cf147eb3f2024
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10133/cluster_config.json
@@ -0,0 +1,76 @@
+{
+    "cluster_config": [
+        {
+            "entity_id": "0",
+            "hostname": "localhost",
+            "role": "zookeeper",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9990"
+        },
+        {
+            "entity_id": "1",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9991"
+        },
+        {
+            "entity_id": "2",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9992"
+        },
+        {
+            "entity_id": "3",
+            "hostname": "localhost",
+            "role": "broker",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9993"
+        },
+        {
+            "entity_id": "4",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9997"
+        },
+        {
+            "entity_id": "5",
+            "hostname": "localhost",
+            "role": "producer_performance",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9998"
+        },
+        {
+            "entity_id": "6",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9999"
+        },
+        {
+            "entity_id": "7",
+            "hostname": "localhost",
+            "role": "console_consumer",
+            "cluster_name": "source",
+            "kafka_home": "default",
+            "java_home": "default",
+            "jmx_port": "9099"
+        }
+    ]
+}
diff --git a/system_test/replication_testsuite/testcase_10133/testcase_10133_properties.json b/system_test/replication_testsuite/testcase_10133/testcase_10133_properties.json
new file mode 100644
index 0000000000000..8276aae0aa822
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10133/testcase_10133_properties.json
@@ -0,0 +1,107 @@
+{
+  "description": {"01":"Leader Failure in Replication with multi topics & partitions : 1. mode => async; 2. comp => 0",
+                  "02":"Produce and consume messages to 2 topics - 3 partitions",
+                  "03":"This test sends messages to 2 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:async, acks:1, comp:1",
+                  "10":"Log segment size    : 512000"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "auto_create_topic": "true",
+    "bounce_broker": "true",
+    "replica_factor": "2",
+    "num_partition": "3",
+    "num_iteration": "3",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "log.segment.bytes": "512000",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "default.replication.factor": "2",
+      "num.partitions": "3",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"false",
+      "log_filename": "producer_performance_4.log",
+      "config_filename": "producer_performance_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_2",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "1",
+      "producer-retry-backoff-ms": "300",
+      "sync":"false",
+      "log_filename": "producer_performance_5.log",
+      "config_filename": "producer_performance_5.properties"
+    },
+    {
+      "entity_id": "6",
+      "topic": "test_1",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_6.log",
+      "config_filename": "console_consumer_6.properties"
+    },
+    {
+      "entity_id": "7",
+      "topic": "test_2",
+      "group.id": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_7.log",
+      "config_filename": "console_consumer_7.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_10134/testcase_10134_properties.json b/system_test/replication_testsuite/testcase_10134/testcase_10134_properties.json
new file mode 100644
index 0000000000000..73bb859952262
--- /dev/null
+++ b/system_test/replication_testsuite/testcase_10134/testcase_10134_properties.json
@@ -0,0 +1,92 @@
+{
+  "description": {"01":"Leader Failure in Replication with multi topics & partitions : 1. auto_create_topic => true",
+                  "02":"Produce and consume messages to 2 topics - 3 partitions",
+                  "03":"This test sends messages to 2 replicas",
+                  "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)",
+                  "05":"Restart the terminated broker",
+                  "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully",
+                  "07":"At the end it verifies the log size and contents",
+                  "08":"Use a consumer to verify no message loss.",
+                  "09":"Producer dimensions : mode:sync, acks:-1, comp:0",
+                  "10":"Log segment size    : 102400"
+  },
+  "testcase_args": {
+    "broker_type": "leader",
+    "bounce_broker": "true",
+    "replica_factor": "2",
+    "num_partition": "3",
+    "num_iteration": "3",
+    "auto_create_topic": "true",
+    "producer_multi_topics_mode": "true",
+    "consumer_multi_topics_mode": "true",
+    "sleep_seconds_between_producer_calls": "1",
+    "message_producing_free_time_sec": "15"
+  },
+  "entities": [
+    {
+      "entity_id": "0",
+      "clientPort": "2188",
+      "dataDir": "/tmp/zookeeper_0",
+      "log_filename": "zookeeper_2188.log",
+      "config_filename": "zookeeper_2188.properties"
+    },
+    {
+      "entity_id": "1",
+      "port": "9091",
+      "broker.id": "1",
+      "num.partitions": "3",
+      "default.replication.factor": "2",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_1_logs",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9091.log",
+      "config_filename": "kafka_server_9091.properties"
+    },
+    {
+      "entity_id": "2",
+      "port": "9092",
+      "broker.id": "2",
+      "num.partitions": "3",
+      "default.replication.factor": "2",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_2_logs",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9092.log",
+      "config_filename": "kafka_server_9092.properties"
+    },
+    {
+      "entity_id": "3",
+      "port": "9093",
+      "broker.id": "3",
+      "num.partitions": "3",
+      "default.replication.factor": "2",
+      "log.segment.bytes": "102400",
+      "log.dir": "/tmp/kafka_server_3_logs",
+      "log.index.interval.bytes": "10",
+      "log_filename": "kafka_server_9093.log",
+      "config_filename": "kafka_server_9093.properties"
+    },
+    {
+      "entity_id": "4",
+      "topic": "test_1,test_2",
+      "threads": "5",
+      "compression-codec": "1",
+      "message-size": "500",
+      "message": "100",
+      "request-num-acks": "-1",
+      "producer-retry-backoff-ms": "3500",
+      "producer-num-retries": "3",
+      "sync":"false",
+      "log_filename": "producer_performance_4.log",
+      "config_filename": "producer_performance_4.properties"
+    },
+    {
+      "entity_id": "5",
+      "topic": "test_1,test_2",
+      "groupid": "mytestgroup",
+      "consumer-timeout-ms": "10000",
+      "log_filename": "console_consumer_5.log",
+      "config_filename": "console_consumer_5.properties"
+    }
+  ]
+}
diff --git a/system_test/replication_testsuite/testcase_4001/testcase_4001_properties.json b/system_test/replication_testsuite/testcase_4001/testcase_4001_properties.json
index d2ffd952416e7..2652f16b78481 100644
--- a/system_test/replication_testsuite/testcase_4001/testcase_4001_properties.json
+++ b/system_test/replication_testsuite/testcase_4001/testcase_4001_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4002/testcase_4002_properties.json b/system_test/replication_testsuite/testcase_4002/testcase_4002_properties.json
index c86525db84ad0..87245971271bf 100644
--- a/system_test/replication_testsuite/testcase_4002/testcase_4002_properties.json
+++ b/system_test/replication_testsuite/testcase_4002/testcase_4002_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4003/testcase_4003_properties.json b/system_test/replication_testsuite/testcase_4003/testcase_4003_properties.json
index b77e4fdf40017..4e3b6f56281d6 100644
--- a/system_test/replication_testsuite/testcase_4003/testcase_4003_properties.json
+++ b/system_test/replication_testsuite/testcase_4003/testcase_4003_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4004/testcase_4004_properties.json b/system_test/replication_testsuite/testcase_4004/testcase_4004_properties.json
index e753327795162..f8718a648ab30 100644
--- a/system_test/replication_testsuite/testcase_4004/testcase_4004_properties.json
+++ b/system_test/replication_testsuite/testcase_4004/testcase_4004_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4005/testcase_4005_properties.json b/system_test/replication_testsuite/testcase_4005/testcase_4005_properties.json
index 5468401c6b41d..af96c7b3bdaf6 100644
--- a/system_test/replication_testsuite/testcase_4005/testcase_4005_properties.json
+++ b/system_test/replication_testsuite/testcase_4005/testcase_4005_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4006/testcase_4006_properties.json b/system_test/replication_testsuite/testcase_4006/testcase_4006_properties.json
index e5ab0a0e86981..e132236ff3bd2 100644
--- a/system_test/replication_testsuite/testcase_4006/testcase_4006_properties.json
+++ b/system_test/replication_testsuite/testcase_4006/testcase_4006_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4007/testcase_4007_properties.json b/system_test/replication_testsuite/testcase_4007/testcase_4007_properties.json
index 7aa6e9a72ca74..5c4e5bbfd7da4 100644
--- a/system_test/replication_testsuite/testcase_4007/testcase_4007_properties.json
+++ b/system_test/replication_testsuite/testcase_4007/testcase_4007_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4008/testcase_4008_properties.json b/system_test/replication_testsuite/testcase_4008/testcase_4008_properties.json
index 08aa108f4f43a..8dce9b2b3883c 100644
--- a/system_test/replication_testsuite/testcase_4008/testcase_4008_properties.json
+++ b/system_test/replication_testsuite/testcase_4008/testcase_4008_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4011/testcase_4011_properties.json b/system_test/replication_testsuite/testcase_4011/testcase_4011_properties.json
index 512fafb949c0d..c6f1d1c15f832 100644
--- a/system_test/replication_testsuite/testcase_4011/testcase_4011_properties.json
+++ b/system_test/replication_testsuite/testcase_4011/testcase_4011_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4012/testcase_4012_properties.json b/system_test/replication_testsuite/testcase_4012/testcase_4012_properties.json
index 9b711af013994..bc1ff6397baa5 100644
--- a/system_test/replication_testsuite/testcase_4012/testcase_4012_properties.json
+++ b/system_test/replication_testsuite/testcase_4012/testcase_4012_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4013/testcase_4013_properties.json b/system_test/replication_testsuite/testcase_4013/testcase_4013_properties.json
index 3836366b3b19e..aa48a6861a7e0 100644
--- a/system_test/replication_testsuite/testcase_4013/testcase_4013_properties.json
+++ b/system_test/replication_testsuite/testcase_4013/testcase_4013_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4014/testcase_4014_properties.json b/system_test/replication_testsuite/testcase_4014/testcase_4014_properties.json
index 86ab75a46a48e..7acf8b68514ea 100644
--- a/system_test/replication_testsuite/testcase_4014/testcase_4014_properties.json
+++ b/system_test/replication_testsuite/testcase_4014/testcase_4014_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4015/testcase_4015_properties.json b/system_test/replication_testsuite/testcase_4015/testcase_4015_properties.json
index 82d51b6072c6d..7841273a3876f 100644
--- a/system_test/replication_testsuite/testcase_4015/testcase_4015_properties.json
+++ b/system_test/replication_testsuite/testcase_4015/testcase_4015_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4016/testcase_4016_properties.json b/system_test/replication_testsuite/testcase_4016/testcase_4016_properties.json
index 31c1be0d73393..0519d273dcf34 100644
--- a/system_test/replication_testsuite/testcase_4016/testcase_4016_properties.json
+++ b/system_test/replication_testsuite/testcase_4016/testcase_4016_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "0",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/replication_testsuite/testcase_4017/testcase_4017_properties.json b/system_test/replication_testsuite/testcase_4017/testcase_4017_properties.json
index 72f78b074243f..c29077bfd5dc1 100644
--- a/system_test/replication_testsuite/testcase_4017/testcase_4017_properties.json
+++ b/system_test/replication_testsuite/testcase_4017/testcase_4017_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_4018/testcase_4018_properties.json b/system_test/replication_testsuite/testcase_4018/testcase_4018_properties.json
index ee459f4e35d68..ab57e5ab18703 100644
--- a/system_test/replication_testsuite/testcase_4018/testcase_4018_properties.json
+++ b/system_test/replication_testsuite/testcase_4018/testcase_4018_properties.json
@@ -67,6 +67,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "test_1",
       "threads": "5",
       "compression-codec": "1",
@@ -80,6 +81,7 @@
     },
     {
       "entity_id": "5",
+      "new-producer":"true",
       "topic": "test_2",
       "threads": "5",
       "compression-codec": "1",
diff --git a/system_test/replication_testsuite/testcase_9051/testcase_9051_properties.json b/system_test/replication_testsuite/testcase_9051/testcase_9051_properties.json
index 958eef7df0be1..e959aeda4fabe 100644
--- a/system_test/replication_testsuite/testcase_9051/testcase_9051_properties.json
+++ b/system_test/replication_testsuite/testcase_9051/testcase_9051_properties.json
@@ -60,6 +60,7 @@
     },
     {
       "entity_id": "4",
+      "new-producer":"true",
       "topic": "t001",
       "threads": "5",
       "compression-codec": "0",
diff --git a/system_test/system_test_runner.py b/system_test/system_test_runner.py
index ee7aa25233355..5078d4479fab7 100644
--- a/system_test/system_test_runner.py
+++ b/system_test/system_test_runner.py
@@ -199,6 +199,6 @@ def main():
 # main entry point
 # =========================
 
-main()
+sys.exit(main())
 
 
diff --git a/system_test/testcase_to_run.json b/system_test/testcase_to_run.json
index 8252860bcd734..c6cf17ea69073 100644
--- a/system_test/testcase_to_run.json
+++ b/system_test/testcase_to_run.json
@@ -1,5 +1,5 @@
 {
     "ReplicaBasicTest"   : [
-        "testcase_0001"
+        "testcase_1"
     ]
 }
diff --git a/system_test/testcase_to_run_all.json b/system_test/testcase_to_run_all.json
index 182160c6ea347..3e80a1f79a63c 100644
--- a/system_test/testcase_to_run_all.json
+++ b/system_test/testcase_to_run_all.json
@@ -26,6 +26,17 @@
         "testcase_0109",
         "testcase_0110",
 
+        "testcase_10101",
+        "testcase_10102",
+        "testcase_10103",
+        "testcase_10104",
+        "testcase_10105",
+        "testcase_10106",
+        "testcase_10107",
+        "testcase_10108",
+        "testcase_10109",
+        "testcase_10110",
+
         "testcase_0111",
         "testcase_0112",
         "testcase_0113",
@@ -46,6 +57,12 @@
         "testcase_0131",
         "testcase_0132",
         "testcase_0133",
+        "testcase_0134",
+
+        "testcase_10131",
+        "testcase_10132",
+        "testcase_10133",
+        "testcase_10134",
 
         "testcase_0151",
         "testcase_0152",
@@ -104,20 +121,19 @@
         "testcase_9051"
     ],
 
-    "MigrationToolTest"  : [
-        "testcase_9001",
-        "testcase_9003",
-        "testcase_9004",
-        "testcase_9005",
-        "testcase_9006"
-    ],
-
     "MirrorMakerTest"  : [
         "testcase_5001",
         "testcase_5002",
         "testcase_5003",
         "testcase_5004",
         "testcase_5005",
-        "testcase_5006"
+        "testcase_5006",
+
+        "testcase_15001",
+        "testcase_15002",
+        "testcase_15003",
+        "testcase_15004",
+        "testcase_15005",
+        "testcase_15006"
     ]
 }
diff --git a/system_test/utils/kafka_system_test_utils.py b/system_test/utils/kafka_system_test_utils.py
index fb4a9c05bf6f3..41d511cbc310f 100644
--- a/system_test/utils/kafka_system_test_utils.py
+++ b/system_test/utils/kafka_system_test_utils.py
@@ -117,7 +117,7 @@ def generate_testcase_log_dirs(systemTestEnv, testcaseEnv):
         # create the role directory under dashboards
         dashboardsRoleDir = dashboardsPathName + "/" + role
         if not os.path.exists(dashboardsRoleDir) : os.makedirs(dashboardsRoleDir)
-        
+
 
 def collect_logs_from_remote_hosts(systemTestEnv, testcaseEnv):
     anonLogger.info("================================================")
@@ -212,7 +212,7 @@ def collect_logs_from_remote_hosts(systemTestEnv, testcaseEnv):
     logger.debug("executing command [" + cmdStr + "]", extra=d)
     system_test_utils.sys_call(cmdStr)
 
- 
+
 def generate_testcase_log_dirs_in_remote_hosts(systemTestEnv, testcaseEnv):
     testCaseBaseDir = testcaseEnv.testCaseBaseDir
 
@@ -432,9 +432,9 @@ def generate_overriden_props_files(testsuitePathname, testcaseEnv, systemTestEnv
                             sys.exit(1)
 
                     addedCSVConfig = {}
-                    addedCSVConfig["kafka.csv.metrics.dir"] = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", clusterCfg["entity_id"], "metrics") 
-                    addedCSVConfig["kafka.metrics.polling.interval.secs"] = "5" 
-                    addedCSVConfig["kafka.metrics.reporters"] = "kafka.metrics.KafkaCSVMetricsReporter" 
+                    addedCSVConfig["kafka.csv.metrics.dir"] = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", clusterCfg["entity_id"], "metrics")
+                    addedCSVConfig["kafka.metrics.polling.interval.secs"] = "5"
+                    addedCSVConfig["kafka.metrics.reporters"] = "kafka.metrics.KafkaCSVMetricsReporter"
                     addedCSVConfig["kafka.csv.metrics.reporter.enabled"] = "true"
 
                     if brokerVersion == "0.7":
@@ -458,6 +458,7 @@ def generate_overriden_props_files(testsuitePathname, testcaseEnv, systemTestEnv
 
                 elif ( clusterCfg["role"] == "mirror_maker"):
                     tcCfg["metadata.broker.list"] = testcaseEnv.userDefinedEnvVarDict["targetBrokerList"]
+                    tcCfg["bootstrap.servers"] = testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] # for new producer
                     copy_file_with_dict_values(cfgTemplatePathname + "/mirror_producer.properties",
                         cfgDestPathname + "/" + tcCfg["mirror_producer_config_filename"], tcCfg, None)
 
@@ -465,7 +466,7 @@ def generate_overriden_props_files(testsuitePathname, testcaseEnv, systemTestEnv
                     tcCfg["zookeeper.connect"] = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]
                     copy_file_with_dict_values(cfgTemplatePathname + "/mirror_consumer.properties",
                         cfgDestPathname + "/" + tcCfg["mirror_consumer_config_filename"], tcCfg, None)
-                
+
                 else:
                     logger.debug("UNHANDLED role " + clusterCfg["role"], extra=d)
 
@@ -494,7 +495,7 @@ def scp_file_to_remote_host(clusterEntityConfigDictList, testcaseEnv):
 def start_zookeepers(systemTestEnv, testcaseEnv):
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
-    zkEntityIdList = system_test_utils.get_data_from_list_of_dicts( 
+    zkEntityIdList = system_test_utils.get_data_from_list_of_dicts(
         clusterEntityConfigDictList, "role", "zookeeper", "entity_id")
 
     for zkEntityId in zkEntityIdList:
@@ -533,12 +534,23 @@ def start_zookeepers(systemTestEnv, testcaseEnv):
 def start_brokers(systemTestEnv, testcaseEnv):
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
-    brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( 
+    brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts(
         clusterEntityConfigDictList, "role", "broker", "entity_id")
 
     for brokerEntityId in brokerEntityIdList:
         start_entity_in_background(systemTestEnv, testcaseEnv, brokerEntityId)
 
+def start_console_consumers(systemTestEnv, testcaseEnv, onlyThisEntityId=None):
+
+    if onlyThisEntityId is not None:
+        start_entity_in_background(systemTestEnv, testcaseEnv, onlyThisEntityId)
+    else:
+        clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
+        consoleConsumerEntityIdList = system_test_utils.get_data_from_list_of_dicts(
+            clusterEntityConfigDictList, "role", "console_consumer", "entity_id")
+        for entityId in consoleConsumerEntityIdList:
+            start_entity_in_background(systemTestEnv, testcaseEnv, entityId)
+
 
 def start_mirror_makers(systemTestEnv, testcaseEnv, onlyThisEntityId=None):
 
@@ -546,7 +558,7 @@ def start_mirror_makers(systemTestEnv, testcaseEnv, onlyThisEntityId=None):
         start_entity_in_background(systemTestEnv, testcaseEnv, onlyThisEntityId)
     else:
         clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
-        brokerEntityIdList          = system_test_utils.get_data_from_list_of_dicts( 
+        brokerEntityIdList          = system_test_utils.get_data_from_list_of_dicts(
                                       clusterEntityConfigDictList, "role", "mirror_maker", "entity_id")
 
         for brokerEntityId in brokerEntityIdList:
@@ -559,17 +571,17 @@ def get_broker_shutdown_log_line(systemTestEnv, testcaseEnv, leaderAttributesDic
 
     # keep track of broker related data in this dict such as broker id,
     # entity id and timestamp and return it to the caller function
-    shutdownBrokerDict = {} 
+    shutdownBrokerDict = {}
 
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
-    brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( 
+    brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts(
                              clusterEntityConfigDictList, "role", "broker", "entity_id")
 
     for brokerEntityId in brokerEntityIdList:
 
-        hostname   = system_test_utils.get_data_by_lookup_keyval( 
+        hostname   = system_test_utils.get_data_by_lookup_keyval(
                          clusterEntityConfigDictList, "entity_id", brokerEntityId, "hostname")
-        logFile    = system_test_utils.get_data_by_lookup_keyval( 
+        logFile    = system_test_utils.get_data_by_lookup_keyval(
                          testcaseEnv.testcaseConfigsList, "entity_id", brokerEntityId, "log_filename")
 
         logPathName = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", brokerEntityId, "default")
@@ -617,7 +629,7 @@ def get_leader_elected_log_line(systemTestEnv, testcaseEnv, leaderAttributesDict
 
     # keep track of leader related data in this dict such as broker id,
     # entity id and timestamp and return it to the caller function
-    leaderDict = {} 
+    leaderDict = {}
 
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
     brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( \
@@ -696,6 +708,7 @@ def start_entity_in_background(systemTestEnv, testcaseEnv, entityId):
     configFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "config_filename")
     logFile    = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "log_filename")
 
+    useNewProducer = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "new-producer")
     mmConsumerConfigFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId,
                            "mirror_consumer_config_filename")
     mmProducerConfigFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId,
@@ -722,27 +735,107 @@ def start_entity_in_background(systemTestEnv, testcaseEnv, entityId):
     elif role == "broker":
         cmdList = ["ssh " + hostname,
                   "'JAVA_HOME=" + javaHome,
-                 "JMX_PORT=" + jmxPort,
+                  "JMX_PORT=" + jmxPort,
+                  "KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%s/config/log4j.properties" % kafkaHome,
                   kafkaHome + "/bin/kafka-run-class.sh kafka.Kafka",
                   configPathName + "/" + configFile + " >> ",
                   logPathName + "/" + logFile + " & echo pid:$! > ",
                   logPathName + "/entity_" + entityId + "_pid'"]
 
     elif role == "mirror_maker":
+        if useNewProducer.lower() == "true":
+            cmdList = ["ssh " + hostname,
+                      "'JAVA_HOME=" + javaHome,
+                      "JMX_PORT=" + jmxPort,
+                      kafkaHome + "/bin/kafka-run-class.sh kafka.tools.MirrorMaker",
+                      "--consumer.config " + configPathName + "/" + mmConsumerConfigFile,
+                      "--producer.config " + configPathName + "/" + mmProducerConfigFile,
+                      "--new.producer",
+                      "--whitelist=\".*\" >> ",
+                      logPathName + "/" + logFile + " & echo pid:$! > ",
+                      logPathName + "/entity_" + entityId + "_pid'"]
+        else:
+            cmdList = ["ssh " + hostname,
+                      "'JAVA_HOME=" + javaHome,
+                      "JMX_PORT=" + jmxPort,
+                      kafkaHome + "/bin/kafka-run-class.sh kafka.tools.MirrorMaker",
+                      "--consumer.config " + configPathName + "/" + mmConsumerConfigFile,
+                      "--producer.config " + configPathName + "/" + mmProducerConfigFile,
+                      "--whitelist=\".*\" >> ",
+                      logPathName + "/" + logFile + " & echo pid:$! > ",
+                      logPathName + "/entity_" + entityId + "_pid'"]
+
+    elif role == "console_consumer":
+        clusterToConsumeFrom = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "cluster_name")
+        numTopicsForAutoGenString = -1
+        try:
+            numTopicsForAutoGenString = int(testcaseEnv.testcaseArgumentsDict["num_topics_for_auto_generated_string"])
+        except:
+            pass
+
+        topic = ""
+        if numTopicsForAutoGenString < 0:
+            topic = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "topic")
+        else:
+            topic = generate_topics_string("topic", numTopicsForAutoGenString)
+
+        # update this variable and will be used by data validation functions
+        testcaseEnv.consumerTopicsString = topic
+
+        # 2. consumer timeout
+        timeoutMs = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "consumer-timeout-ms")
+
+        # 3. consumer formatter
+        formatterOption = ""
+        try:
+            formatterOption = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "formatter")
+        except:
+            pass
+
+        # 4. consumer config
+        consumerProperties = {}
+        consumerProperties["consumer.timeout.ms"] = timeoutMs
+        try:
+            groupOption = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "group.id")
+            consumerProperties["group.id"] = groupOption
+        except:
+            pass
+
+        props_file_path=write_consumer_properties(consumerProperties)
+        scpCmdStr = "scp "+ props_file_path +" "+ hostname + ":/tmp/"
+        logger.debug("executing command [" + scpCmdStr + "]", extra=d)
+        system_test_utils.sys_call(scpCmdStr)
+
+        if len(formatterOption) > 0:
+            formatterOption = " --formatter " + formatterOption + " "
+
+        # get zookeeper connect string
+        zkConnectStr = ""
+        if clusterName == "source":
+            zkConnectStr = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]
+        elif clusterName == "target":
+            zkConnectStr = testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"]
+        else:
+            logger.error("Invalid cluster name : " + clusterName, extra=d)
+            sys.exit(1)
         cmdList = ["ssh " + hostname,
-                  "'JAVA_HOME=" + javaHome,
-                 "JMX_PORT=" + jmxPort,
-                  kafkaHome + "/bin/kafka-run-class.sh kafka.tools.MirrorMaker",
-                  "--consumer.config " + configPathName + "/" + mmConsumerConfigFile,
-                  "--producer.config " + configPathName + "/" + mmProducerConfigFile,
-                  "--whitelist=\".*\" >> ",
-                  logPathName + "/" + logFile + " & echo pid:$! > ",
-                  logPathName + "/entity_" + entityId + "_pid'"]
+                   "'JAVA_HOME=" + javaHome,
+                   "JMX_PORT=" + jmxPort,
+                   kafkaHome + "/bin/kafka-run-class.sh kafka.tools.ConsoleConsumer",
+                   "--zookeeper " + zkConnectStr,
+                   "--topic " + topic,
+                   "--consumer.config /tmp/consumer.properties",
+                   "--csv-reporter-enabled",
+                   formatterOption,
+                   "--from-beginning",
+                   " >> " + logPathName + "/" + logFile + " & echo pid:$! > ",
+                   logPathName + "/entity_" + entityId + "_pid'"]
 
     cmdStr = " ".join(cmdList)
 
     logger.debug("executing command: [" + cmdStr + "]", extra=d)
     system_test_utils.async_sys_call(cmdStr)
+    logger.info("sleeping for 5 seconds.", extra=d)
     time.sleep(5)
 
     pidCmdStr = "ssh " + hostname + " 'cat " + logPathName + "/entity_" + entityId + "_pid' 2> /dev/null"
@@ -761,6 +854,8 @@ def start_entity_in_background(systemTestEnv, testcaseEnv, entityId):
                 testcaseEnv.entityBrokerParentPidDict[entityId] = tokens[1]
             elif role == "mirror_maker":
                 testcaseEnv.entityMirrorMakerParentPidDict[entityId] = tokens[1]
+            elif role == "console_consumer":
+                testcaseEnv.entityConsoleConsumerParentPidDict[entityId] = tokens[1]
 
 
 def start_console_consumer(systemTestEnv, testcaseEnv):
@@ -771,9 +866,9 @@ def start_console_consumer(systemTestEnv, testcaseEnv):
     for consumerConfig in consumerConfigList:
         host              = consumerConfig["hostname"]
         entityId          = consumerConfig["entity_id"]
-        jmxPort           = consumerConfig["jmx_port"] 
+        jmxPort           = consumerConfig["jmx_port"]
         role              = consumerConfig["role"]
-        clusterName       = consumerConfig["cluster_name"] 
+        clusterName       = consumerConfig["cluster_name"]
         kafkaHome         = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "kafka_home")
         javaHome          = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "java_home")
         jmxPort           = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "jmx_port")
@@ -835,13 +930,20 @@ def start_console_consumer(systemTestEnv, testcaseEnv):
             logger.error("Invalid cluster name : " + clusterName, extra=d)
             sys.exit(1)
 
+        consumerProperties = {}
+        consumerProperties["consumer.timeout.ms"] = timeoutMs
+        props_file_path=write_consumer_properties(consumerProperties)
+        scpCmdStr = "scp "+ props_file_path +" "+ host + ":/tmp/"
+        logger.debug("executing command [" + scpCmdStr + "]", extra=d)
+        system_test_utils.sys_call(scpCmdStr)
+
         cmdList = ["ssh " + host,
                    "'JAVA_HOME=" + javaHome,
                    "JMX_PORT=" + jmxPort,
-                   kafkaRunClassBin + " kafka.consumer.ConsoleConsumer",
+                   kafkaRunClassBin + " kafka.tools.ConsoleConsumer",
                    "--zookeeper " + zkConnectStr,
                    "--topic " + topic,
-                   "--consumer-timeout-ms " + timeoutMs,
+                   "--consumer.config /tmp/consumer.properties",
                    "--csv-reporter-enabled",
                    #"--metrics-dir " + metricsDir,
                    formatterOption,
@@ -884,14 +986,16 @@ def start_producer_performance(systemTestEnv, testcaseEnv, kafka07Client):
     for producerConfig in producerConfigList:
         host              = producerConfig["hostname"]
         entityId          = producerConfig["entity_id"]
-        jmxPort           = producerConfig["jmx_port"] 
-        role              = producerConfig["role"] 
+        jmxPort           = producerConfig["jmx_port"]
+        role              = producerConfig["role"]
 
         thread.start_new_thread(start_producer_in_thread, (testcaseEnv, entityConfigList, producerConfig, kafka07Client))
+        logger.debug("calling testcaseEnv.lock.acquire()", extra=d)
         testcaseEnv.lock.acquire()
         testcaseEnv.numProducerThreadsRunning += 1
         logger.debug("testcaseEnv.numProducerThreadsRunning : " + str(testcaseEnv.numProducerThreadsRunning), extra=d)
         time.sleep(1)
+        logger.debug("calling testcaseEnv.lock.release()", extra=d)
         testcaseEnv.lock.release()
 
 def generate_topics_string(topicPrefix, numOfTopics):
@@ -925,7 +1029,7 @@ def generate_topics_string(topicPrefix, numOfTopics):
 def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafka07Client):
     host              = producerConfig["hostname"]
     entityId          = producerConfig["entity_id"]
-    jmxPort           = producerConfig["jmx_port"] 
+    jmxPort           = producerConfig["jmx_port"]
     role              = producerConfig["role"]
     clusterName       = producerConfig["cluster_name"]
     kafkaHome         = system_test_utils.get_data_by_lookup_keyval(entityConfigList, "entity_id", entityId, "kafka_home")
@@ -933,6 +1037,9 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
     jmxPort           = system_test_utils.get_data_by_lookup_keyval(entityConfigList, "entity_id", entityId, "jmx_port")
     kafkaRunClassBin  = kafkaHome + "/bin/kafka-run-class.sh"
 
+    # first keep track of its pid
+    testcaseEnv.producerHostParentPidDict[entityId] = os.getpid()
+
     # get optional testcase arguments
     numTopicsForAutoGenString = -1
     try:
@@ -957,6 +1064,7 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
     noMsgPerBatch  = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "message")
     requestNumAcks = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "request-num-acks")
     syncMode       = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "sync")
+    useNewProducer = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "new-producer")
     retryBackoffMs = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "producer-retry-backoff-ms")
     numOfRetries   = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "producer-num-retries")
 
@@ -995,6 +1103,8 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
     boolArgumentsStr = ""
     if syncMode.lower() == "true":
         boolArgumentsStr = boolArgumentsStr + " --sync"
+    if useNewProducer.lower() == "true":
+        boolArgumentsStr = boolArgumentsStr + " --new-producer"
 
     # keep calling producer until signaled to stop by:
     # testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"]
@@ -1011,7 +1121,7 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
                        "'JAVA_HOME=" + javaHome,
                        "JMX_PORT=" + jmxPort,
                        "KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%s/config/test-log4j.properties" % kafkaHome,
-                       kafkaRunClassBin + " kafka.perf.ProducerPerformance",
+                       kafkaRunClassBin + " kafka.tools.ProducerPerformance",
                        "--broker-list " + brokerListStr,
                        "--initial-message-id " + str(initMsgId),
                        "--messages " + noMsgPerBatch,
@@ -1026,7 +1136,8 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
                        "--metrics-dir " + metricsDir,
                        boolArgumentsStr,
                        " >> " + producerLogPathName,
-                       " & echo pid:$! > " + producerLogPath + "/entity_" + entityId + "_pid'"]
+                       " & echo $! > " + producerLogPath + "/entity_" + entityId + "_pid",
+                       " & wait'"]
 
             if kafka07Client:
                 cmdList[:] = []
@@ -1047,7 +1158,7 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
                        "'JAVA_HOME=" + javaHome,
                        "JMX_PORT=" + jmxPort,
                        "KAFKA_LOG4J_OPTS=-Dlog4j.configuration=file:%s/config/test-log4j.properties" % kafkaHome,
-                       kafkaRunClassBin + " kafka.perf.ProducerPerformance",
+                       kafkaRunClassBin + " kafka.tools.ProducerPerformance",
                        "--brokerinfo " + brokerInfoStr,
                        "--initial-message-id " + str(initMsgId),
                        "--messages " + noMsgPerBatch,
@@ -1057,17 +1168,20 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
                        "--message-size " + messageSize,
                        "--vary-message-size --async",
                        " >> " + producerLogPathName,
-                       " & echo pid:$! > " + producerLogPath + "/entity_" + entityId + "_pid'"]
+                       " & echo $! > " + producerLogPath + "/entity_" + entityId + "_pid",
+                       " & wait'"]
 
             cmdStr = " ".join(cmdList)
             logger.debug("executing command: [" + cmdStr + "]", extra=d)
 
             subproc = system_test_utils.sys_call_return_subproc(cmdStr)
-            for line in subproc.stdout.readlines():
-                pass    # dummy loop to wait until producer is completed
+            logger.debug("waiting for producer to finish", extra=d)
+            subproc.communicate()
+            logger.debug("producer finished", extra=d)
         else:
             testcaseEnv.numProducerThreadsRunning -= 1
             logger.debug("testcaseEnv.numProducerThreadsRunning : " + str(testcaseEnv.numProducerThreadsRunning), extra=d)
+            logger.debug("calling testcaseEnv.lock.release()", extra=d)
             testcaseEnv.lock.release()
             break
 
@@ -1079,24 +1193,30 @@ def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafk
     # wait until other producer threads also stops and
     # let the main testcase know all producers have stopped
     while 1:
+        logger.debug("calling testcaseEnv.lock.acquire()", extra=d)
         testcaseEnv.lock.acquire()
         time.sleep(1)
         if testcaseEnv.numProducerThreadsRunning == 0:
             testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = True
+            logger.debug("calling testcaseEnv.lock.release()", extra=d)
             testcaseEnv.lock.release()
             break
         else:
             logger.debug("waiting for TRUE of testcaseEnv.userDefinedEnvVarDict['backgroundProducerStopped']", extra=d)
+            logger.debug("calling testcaseEnv.lock.release()", extra=d)
             testcaseEnv.lock.release()
         time.sleep(1)
 
+    # finally remove itself from the tracking pids
+    del testcaseEnv.producerHostParentPidDict[entityId]
+
 def stop_remote_entity(systemTestEnv, entityId, parentPid, signalType="SIGTERM"):
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
     hostname  = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", entityId, "hostname")
     pidStack  = system_test_utils.get_remote_child_processes(hostname, parentPid)
 
-    logger.debug("terminating (" + signalType + ") process id: " + parentPid + " in host: " + hostname, extra=d)
+    logger.info("terminating (" + signalType + ") process id: " + parentPid + " in host: " + hostname, extra=d)
 
     if signalType.lower() == "sigterm":
         system_test_utils.sigterm_remote_process(hostname, pidStack)
@@ -1117,7 +1237,7 @@ def force_stop_remote_entity(systemTestEnv, entityId, parentPid):
     system_test_utils.sigkill_remote_process(hostname, pidStack)
 
 
-def create_topic(systemTestEnv, testcaseEnv):
+def create_topic_for_producer_performance(systemTestEnv, testcaseEnv):
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
     prodPerfCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "producer_performance")
@@ -1149,7 +1269,7 @@ def create_topic(systemTestEnv, testcaseEnv):
             testcaseBaseDir = replace_kafka_home(testcaseBaseDir, kafkaHome)
 
         for topic in topicsList:
-            logger.info("creating topic: [" + topic + "] at: [" + zkConnectStr + "]", extra=d) 
+            logger.info("creating topic: [" + topic + "] at: [" + zkConnectStr + "]", extra=d)
             cmdList = ["ssh " + zkHost,
                        "'JAVA_HOME=" + javaHome,
                        createTopicBin,
@@ -1158,11 +1278,45 @@ def create_topic(systemTestEnv, testcaseEnv):
                        " --replication-factor "   + testcaseEnv.testcaseArgumentsDict["replica_factor"],
                        " --partitions " + testcaseEnv.testcaseArgumentsDict["num_partition"] + " >> ",
                        testcaseBaseDir + "/logs/create_source_cluster_topic.log'"]
-    
+
             cmdStr = " ".join(cmdList)
             logger.debug("executing command: [" + cmdStr + "]", extra=d)
             subproc = system_test_utils.sys_call_return_subproc(cmdStr)
 
+def create_topic(systemTestEnv, testcaseEnv, topic, replication_factor, num_partitions):
+    clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
+    zkEntityId      = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "entity_id")
+    kafkaHome       = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "kafka_home")
+    javaHome        = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "java_home")
+    createTopicBin  = kafkaHome + "/bin/kafka-topics.sh --create"
+    zkConnectStr = ""
+    zkHost = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "hostname")
+    if len(testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]) > 0:
+        zkConnectStr = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]
+    elif len(testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"]) > 0:
+        zkConnectStr = testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"]
+    else:
+        raise Exception("Empty zkConnectStr found")
+
+    testcaseBaseDir = testcaseEnv.testCaseBaseDir
+
+    testcaseBaseDir = replace_kafka_home(testcaseBaseDir, kafkaHome)
+
+    logger.debug("creating topic: [" + topic + "] at: [" + zkConnectStr + "]", extra=d)
+    cmdList = ["ssh " + zkHost,
+               "'JAVA_HOME=" + javaHome,
+               createTopicBin,
+               " --topic "     + topic,
+               " --zookeeper " + zkConnectStr,
+               " --replication-factor "   + str(replication_factor),
+               " --partitions " + str(num_partitions) + " >> ",
+               testcaseBaseDir + "/logs/create_source_cluster_topic.log'"]
+
+    cmdStr = " ".join(cmdList)
+    logger.info("executing command: [" + cmdStr + "]", extra=d)
+    subproc = system_test_utils.sys_call_return_subproc(cmdStr)
+
+
 
 def get_message_id(logPathName, topic=""):
     logLines      = open(logPathName, "r").readlines()
@@ -1200,7 +1354,7 @@ def get_message_checksum(logPathName):
 
 
 def validate_data_matched(systemTestEnv, testcaseEnv, replicationUtils):
-    logger.debug("#### Inside validate_data_matched", extra=d)
+    logger.info("#### Inside validate_data_matched", extra=d)
 
     validationStatusDict        = testcaseEnv.validationStatusDict
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
@@ -1224,11 +1378,11 @@ def validate_data_matched(systemTestEnv, testcaseEnv, replicationUtils):
             consumerTopic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", consumerEntityId, "topic")
             if consumerTopic in topic:
                 matchingConsumerEntityId = consumerEntityId
-                logger.debug("matching consumer entity id found", extra=d)
+                logger.info("matching consumer entity id found", extra=d)
                 break
 
         if matchingConsumerEntityId is None:
-            logger.debug("matching consumer entity id NOT found", extra=d)
+            logger.info("matching consumer entity id NOT found", extra=d)
             break
 
         msgIdMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( \
@@ -1316,7 +1470,7 @@ def cleanup_data_at_remote_hosts(systemTestEnv, testcaseEnv):
     logger.info("cleaning up test case dir: [" + testCaseBaseDir + "]", extra=d)
 
     if "system_test" not in testCaseBaseDir:
-        logger.warn("possible destructive command [" + cmdStr + "]", extra=d)
+        # logger.warn("possible destructive command [" + cmdStr + "]", extra=d)
         logger.warn("check config file: system_test/cluster_config.properties", extra=d)
         logger.warn("aborting test...", extra=d)
         sys.exit(1)
@@ -1416,7 +1570,7 @@ def ps_grep_terminate_running_entity(systemTestEnv):
         cmdStr = " ".join(cmdList)
         logger.debug("executing command [" + cmdStr + "]", extra=d)
 
-        system_test_utils.sys_call(cmdStr) 
+        system_test_utils.sys_call(cmdStr)
 
 def get_reelection_latency(systemTestEnv, testcaseEnv, leaderDict, leaderAttributesDict):
     leaderEntityId = None
@@ -1473,7 +1627,7 @@ def get_reelection_latency(systemTestEnv, testcaseEnv, leaderDict, leaderAttribu
     if shutdownTimestamp > 0:
         leaderReElectionLatency = float(leaderDict2["timestamp"]) - float(shutdownTimestamp)
         logger.info("leader Re-election Latency: " + str(leaderReElectionLatency) + " sec", extra=d)
- 
+
     return leaderReElectionLatency
 
 
@@ -1481,9 +1635,36 @@ def stop_all_remote_running_processes(systemTestEnv, testcaseEnv):
 
     entityConfigs = systemTestEnv.clusterEntityConfigDictList
 
-    for hostname, producerPPid in testcaseEnv.producerHostParentPidDict.items():
-        producerEntityId = system_test_utils.get_data_by_lookup_keyval(entityConfigs, "hostname", hostname, "entity_id")
-        stop_remote_entity(systemTestEnv, producerEntityId, producerPPid)
+    # If there are any alive local threads that keep starting remote producer performance, we need to kill them;
+    # note we do not need to stop remote processes since they will terminate themselves eventually.
+    if len(testcaseEnv.producerHostParentPidDict) != 0:
+        # =============================================
+        # tell producer to stop
+        # =============================================
+        logger.debug("calling testcaseEnv.lock.acquire()", extra=d)
+        testcaseEnv.lock.acquire()
+        testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True
+        logger.debug("calling testcaseEnv.lock.release()", extra=d)
+        testcaseEnv.lock.release()
+
+        # =============================================
+        # wait for producer thread's update of
+        # "backgroundProducerStopped" to be "True"
+        # =============================================
+        while 1:
+            logger.debug("calling testcaseEnv.lock.acquire()", extra=d)
+            testcaseEnv.lock.acquire()
+            logger.info("status of backgroundProducerStopped : [" + \
+                str(testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=d)
+            if testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]:
+                logger.debug("calling testcaseEnv.lock.release()", extra=d)
+                testcaseEnv.lock.release()
+                logger.info("all producer threads completed", extra=d)
+                break
+            logger.debug("calling testcaseEnv.lock.release()", extra=d)
+            testcaseEnv.lock.release()
+
+        testcaseEnv.producerHostParentPidDict.clear()
 
     for hostname, consumerPPid in testcaseEnv.consumerHostParentPidDict.items():
         consumerEntityId = system_test_utils.get_data_by_lookup_keyval(entityConfigs, "hostname", hostname, "entity_id")
@@ -1496,6 +1677,9 @@ def stop_all_remote_running_processes(systemTestEnv, testcaseEnv):
     for entityId, mirrorMakerParentPid in testcaseEnv.entityMirrorMakerParentPidDict.items():
         stop_remote_entity(systemTestEnv, entityId, mirrorMakerParentPid)
 
+    for entityId, consumerParentPid in testcaseEnv.entityConsoleConsumerParentPidDict.items():
+        stop_remote_entity(systemTestEnv, entityId, consumerParentPid)
+
     for entityId, brokerParentPid in testcaseEnv.entityBrokerParentPidDict.items():
         stop_remote_entity(systemTestEnv, entityId, brokerParentPid)
 
@@ -1514,8 +1698,8 @@ def start_migration_tool(systemTestEnv, testcaseEnv, onlyThisEntityId=None):
         if onlyThisEntityId is None or entityId == onlyThisEntityId:
 
             host              = migrationToolConfig["hostname"]
-            jmxPort           = migrationToolConfig["jmx_port"] 
-            role              = migrationToolConfig["role"] 
+            jmxPort           = migrationToolConfig["jmx_port"]
+            role              = migrationToolConfig["role"]
             kafkaHome         = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "kafka_home")
             javaHome          = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "java_home")
             jmxPort           = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "jmx_port")
@@ -1581,7 +1765,7 @@ def validate_07_08_migrated_data_matched(systemTestEnv, testcaseEnv):
         producerEntityId = prodPerfCfg["entity_id"]
         topic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", producerEntityId, "topic")
 
-        consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts( 
+        consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts(
                                clusterEntityConfigDictList, "role", "console_consumer", "entity_id")
 
         matchingConsumerEntityId = None
@@ -1595,7 +1779,7 @@ def validate_07_08_migrated_data_matched(systemTestEnv, testcaseEnv):
         if matchingConsumerEntityId is None:
             break
 
-        msgChecksumMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( 
+        msgChecksumMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname(
                                                   testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") \
                                                   + "/msg_checksum_missing_in_consumer.log"
         producerLogPath     = get_testcase_config_log_dir_pathname(testcaseEnv, "producer_performance", producerEntityId, "default")
@@ -1680,7 +1864,7 @@ def validate_broker_log_segment_checksum(systemTestEnv, testcaseEnv, clusterName
         #        |- 00000000000000000020.log
         #        |- . . .
 
-        # loop through all topicPartition directories such as : test_1-0, test_1-1, ... 
+        # loop through all topicPartition directories such as : test_1-0, test_1-1, ...
         for topicPartition in os.listdir(localLogSegmentPath):
             # found a topic-partition directory
             if os.path.isdir(localLogSegmentPath + "/" + topicPartition):
@@ -1733,7 +1917,7 @@ def validate_broker_log_segment_checksum(systemTestEnv, testcaseEnv, clusterName
     #   'test_2-0' : ['d41d8cd98f00b204e9800998ecf8427e','d41d8cd98f00b204e9800998ecf8427e'],
     #   'test_2-1' : ['d41d8cd98f00b204e9800998ecf8427e','d41d8cd98f00b204e9800998ecf8427e']
     # }
-  
+
     for brokerTopicPartitionKey, md5Checksum in brokerLogCksumDict.items():
         tokens = brokerTopicPartitionKey.split(":")
         brokerKey      = tokens[0]
@@ -1759,7 +1943,7 @@ def validate_broker_log_segment_checksum(systemTestEnv, testcaseEnv, clusterName
             logger.debug("merged log segment checksum in " + topicPartition + " matched", extra=d)
         else:
             logger.error("unexpected error in " + topicPartition, extra=d)
-            
+
     if failureCount == 0:
         validationStatusDict["Validate for merged log segment checksum in cluster [" + clusterName + "]"] = "PASSED"
     else:
@@ -1772,8 +1956,8 @@ def start_simple_consumer(systemTestEnv, testcaseEnv, minStartingOffsetDict=None
     for consumerConfig in consumerConfigList:
         host              = consumerConfig["hostname"]
         entityId          = consumerConfig["entity_id"]
-        jmxPort           = consumerConfig["jmx_port"] 
-        clusterName       = consumerConfig["cluster_name"] 
+        jmxPort           = consumerConfig["jmx_port"]
+        clusterName       = consumerConfig["cluster_name"]
         kafkaHome         = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "kafka_home")
         javaHome          = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "java_home")
         kafkaRunClassBin  = kafkaHome + "/bin/kafka-run-class.sh"
@@ -1837,16 +2021,16 @@ def start_simple_consumer(systemTestEnv, testcaseEnv, minStartingOffsetDict=None
                            "--no-wait-at-logend ",
                            " > " + outputFilePathName,
                            " & echo pid:$! > " + consumerLogPath + "/entity_" + entityId + "_pid'"]
-    
+
                 cmdStr = " ".join(cmdList)
-    
+
                 logger.debug("executing command: [" + cmdStr + "]", extra=d)
                 subproc_1 = system_test_utils.sys_call_return_subproc(cmdStr)
                 # dummy for-loop to wait until the process is completed
                 for line in subproc_1.stdout.readlines():
-                    pass 
+                    pass
                 time.sleep(1)
-   
+
                 partitionId += 1
             replicaIndex += 1
 
@@ -1855,7 +2039,7 @@ def get_controller_attributes(systemTestEnv, testcaseEnv):
     logger.info("Querying Zookeeper for Controller info ...", extra=d)
 
     # keep track of controller data in this dict such as broker id & entity id
-    controllerDict = {} 
+    controllerDict = {}
 
     clusterConfigsList = systemTestEnv.clusterEntityConfigDictList
     tcConfigsList      = testcaseEnv.testcaseConfigsList
@@ -1871,7 +2055,7 @@ def get_controller_attributes(systemTestEnv, testcaseEnv):
 
     cmdStrList = ["ssh " + hostname,
                   "\"JAVA_HOME=" + javaHome,
-                  kafkaRunClassBin + " org.apache.zookeeper.ZooKeeperMain",
+                  kafkaRunClassBin + " kafka.tools.ZooKeeperMainWrapper ",
                   "-server " + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"],
                   "get /controller 2> /dev/null | tail -1\""]
 
@@ -1910,7 +2094,7 @@ def getMinCommonStartingOffset(systemTestEnv, testcaseEnv, clusterName="source")
         logPathName              = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", brokerEntityId, "default")
         localLogSegmentPath      = logPathName + "/" + remoteLogSegmentDir
 
-        # loop through all topicPartition directories such as : test_1-0, test_1-1, ... 
+        # loop through all topicPartition directories such as : test_1-0, test_1-1, ...
         for topicPartition in sorted(os.listdir(localLogSegmentPath)):
             # found a topic-partition directory
             if os.path.isdir(localLogSegmentPath + "/" + topicPartition):
@@ -1949,7 +2133,7 @@ def getMinCommonStartingOffset(systemTestEnv, testcaseEnv, clusterName="source")
     #  u'3:test_2-0': '0',
     #  u'3:test_2-1': '0'}
 
-    # loop through brokerLogStartOffsetDict to get the min common starting offset for each topic-partition    
+    # loop through brokerLogStartOffsetDict to get the min common starting offset for each topic-partition
     for brokerTopicPartition in sorted(brokerLogStartOffsetDict.iterkeys()):
         topicPartition = brokerTopicPartition.split(':')[1]
 
@@ -2076,7 +2260,6 @@ def validate_data_matched_in_multi_topics_from_single_consumer_producer(systemTe
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
     prodPerfCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "producer_performance")
-    consumerCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "console_consumer")
 
     for prodPerfCfg in prodPerfCfgList:
         producerEntityId = prodPerfCfg["entity_id"]
@@ -2104,7 +2287,7 @@ def validate_data_matched_in_multi_topics_from_single_consumer_producer(systemTe
         topicList = topicStr.split(',')
         for topic in topicList:
             consumerDuplicateCount = 0
-            msgIdMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( 
+            msgIdMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname(
                                                 testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") \
                                                 + "/msg_id_missing_in_consumer_" + topic + ".log"
             producerMsgIdList  = get_message_id(producerLogPathName, topic)
@@ -2120,8 +2303,11 @@ def validate_data_matched_in_multi_topics_from_single_consumer_producer(systemTe
                 outfile.write(id + "\n")
             outfile.close()
 
+            logger.info("Producer entity id " + producerEntityId, extra=d)
+            logger.info("Consumer entity id " + matchingConsumerEntityId, extra=d)
             logger.info("no. of unique messages on topic [" + topic + "] sent from publisher  : " + str(len(producerMsgIdSet)), extra=d)
             logger.info("no. of unique messages on topic [" + topic + "] received by consumer : " + str(len(consumerMsgIdSet)), extra=d)
+            logger.info("no. of duplicate messages on topic [" + topic + "] received by consumer: " + str(consumerDuplicateCount), extra=d)
             validationStatusDict["Unique messages from producer on [" + topic + "]"] = str(len(producerMsgIdSet))
             validationStatusDict["Unique messages from consumer on [" + topic + "]"] = str(len(consumerMsgIdSet))
 
@@ -2214,18 +2400,58 @@ def validate_index_log(systemTestEnv, testcaseEnv, clusterName="source"):
                                 logger.debug("#### error found [" + line + "]", extra=d)
                                 failureCount += 1
                                 showMismatchedIndexOffset = True
+                        if subproc.wait() != 0:
+                            logger.debug("#### error found [DumpLogSegments exited abnormally]", extra=d)
+                            failureCount += 1
 
     if failureCount == 0:
         validationStatusDict["Validate index log in cluster [" + clusterName + "]"] = "PASSED"
     else:
         validationStatusDict["Validate index log in cluster [" + clusterName + "]"] = "FAILED"
 
+def get_leader_for(systemTestEnv, testcaseEnv, topic, partition):
+    logger.info("Querying Zookeeper for leader info for topic " + topic, extra=d)
+    clusterConfigsList = systemTestEnv.clusterEntityConfigDictList
+    tcConfigsList      = testcaseEnv.testcaseConfigsList
+
+    zkDictList         = system_test_utils.get_dict_from_list_of_dicts(clusterConfigsList, "role", "zookeeper")
+    firstZkDict        = zkDictList[0]
+    hostname           = firstZkDict["hostname"]
+    zkEntityId         = firstZkDict["entity_id"]
+    clientPort         = system_test_utils.get_data_by_lookup_keyval(tcConfigsList, "entity_id", zkEntityId, "clientPort")
+    kafkaHome          = system_test_utils.get_data_by_lookup_keyval(clusterConfigsList, "entity_id", zkEntityId, "kafka_home")
+    javaHome           = system_test_utils.get_data_by_lookup_keyval(clusterConfigsList, "entity_id", zkEntityId, "java_home")
+    kafkaRunClassBin   = kafkaHome + "/bin/kafka-run-class.sh"
+
+    zkQueryStr = "get /brokers/topics/" + topic + "/partitions/" + str(partition) + "/state"
+    brokerid   = ''
+    leaderEntityId = ''
+
+    cmdStrList = ["ssh " + hostname,
+                  "\"JAVA_HOME=" + javaHome,
+                  kafkaRunClassBin + " kafka.tools.ZooKeeperMainWrapper ",
+                  "-server " + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"],
+                  zkQueryStr + " 2> /dev/null | tail -1\""]
+    cmdStr = " ".join(cmdStrList)
+    logger.info("executing command [" + cmdStr + "]", extra=d)
+    subproc = system_test_utils.sys_call_return_subproc(cmdStr)
+    for line in subproc.stdout.readlines():
+        if "\"leader\"" in line:
+            line = line.rstrip('\n')
+            json_data = json.loads(line)
+            for key,val in json_data.items():
+                if key == 'leader':
+                    brokerid = str(val)
+            leaderEntityId = system_test_utils.get_data_by_lookup_keyval(tcConfigsList, "broker.id", brokerid, "entity_id")
+            break
+    return leaderEntityId
+
 def get_leader_attributes(systemTestEnv, testcaseEnv):
 
     logger.info("Querying Zookeeper for leader info ...", extra=d)
 
     # keep track of leader data in this dict such as broker id & entity id
-    leaderDict = {} 
+    leaderDict = {}
 
     clusterConfigsList = systemTestEnv.clusterEntityConfigDictList
     tcConfigsList      = testcaseEnv.testcaseConfigsList
@@ -2247,15 +2473,14 @@ def get_leader_attributes(systemTestEnv, testcaseEnv):
 
     cmdStrList = ["ssh " + hostname,
                   "\"JAVA_HOME=" + javaHome,
-                  kafkaRunClassBin + " org.apache.zookeeper.ZooKeeperMain",
+                  kafkaRunClassBin + " kafka.tools.ZooKeeperMainWrapper ",
                   "-server " + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"],
                   zkQueryStr + " 2> /dev/null | tail -1\""]
     cmdStr = " ".join(cmdStrList)
-    logger.debug("executing command [" + cmdStr + "]", extra=d)
+    logger.info("executing command [" + cmdStr + "]", extra=d)
 
     subproc = system_test_utils.sys_call_return_subproc(cmdStr)
     for line in subproc.stdout.readlines():
-        logger.debug("zk returned : " + line, extra=d)
         if "\"leader\"" in line:
             line = line.rstrip('\n')
             json_data = json.loads(line)
@@ -2275,5 +2500,12 @@ def get_leader_attributes(systemTestEnv, testcaseEnv):
     print leaderDict
     return leaderDict
 
-
+def write_consumer_properties(consumerProperties):
+    import tempfile
+    props_file_path = tempfile.gettempdir() + "/consumer.properties"
+    consumer_props_file=open(props_file_path,"w")
+    for key,value in consumerProperties.iteritems():
+        consumer_props_file.write(key+"="+value+"\n")
+    consumer_props_file.close()
+    return props_file_path
 
diff --git a/system_test/utils/metrics.py b/system_test/utils/metrics.py
index d98d3cdeab00b..3e663483202a1 100644
--- a/system_test/utils/metrics.py
+++ b/system_test/utils/metrics.py
@@ -106,6 +106,8 @@ def ensure_valid_headers(headers, attributes):
                         " headers: {0}".format(",".join(headers)))
         
 def plot_graphs(inputCsvFiles, labels, title, xLabel, yLabel, attribute, outputGraphFile):
+    if not inputCsvFiles: return
+
     # create empty plot
     fig=plt.figure()
     fig.subplots_adjust(bottom=0.2)
diff --git a/system_test/utils/system_test_utils.py b/system_test/utils/system_test_utils.py
index 50340f067774e..e8529cd31f920 100644
--- a/system_test/utils/system_test_utils.py
+++ b/system_test/utils/system_test_utils.py
@@ -360,19 +360,23 @@ def setup_remote_hosts(systemTestEnv):
     clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList
 
     localKafkaHome = os.path.abspath(systemTestEnv.SYSTEM_TEST_BASE_DIR + "/..")
-    localJavaBin   = ""
-    localJavaHome  = ""
 
-    subproc = sys_call_return_subproc("which java")
-    for line in subproc.stdout.readlines():
-        if line.startswith("which: no "):
-            logger.error("No Java binary found in local host", extra=d)
-            return False
-        else:
-            line = line.rstrip('\n')
-            localJavaBin = line
-            matchObj = re.match("(.*)\/bin\/java$", line)
-            localJavaHome = matchObj.group(1)
+    # when configuring "default" java_home, use JAVA_HOME environment variable, if exists
+    # otherwise, use the directory with the java binary
+    localJavaHome  = os.environ.get('JAVA_HOME')
+    if localJavaHome is not None:
+        localJavaBin   = localJavaHome + '/bin/java'
+    else:
+        subproc = sys_call_return_subproc("which java")
+        for line in subproc.stdout.readlines():
+            if line.startswith("which: no "):
+                logger.error("No Java binary found in local host", extra=d)
+                return False
+            else:
+                line = line.rstrip('\n')
+                localJavaBin = line
+                matchObj = re.match("(.*)\/bin\/java$", line)
+                localJavaHome = matchObj.group(1)
 
     listIndex = -1
     for clusterEntityConfigDict in clusterEntityConfigDictList:
diff --git a/system_test/utils/testcase_env.py b/system_test/utils/testcase_env.py
index bee87166510b7..1d2fb5762f531 100644
--- a/system_test/utils/testcase_env.py
+++ b/system_test/utils/testcase_env.py
@@ -28,62 +28,67 @@
 import system_test_utils
 
 class TestcaseEnv():
+    def __init__(self, systemTestEnv, classInstance):
+        self.systemTestEnv    = systemTestEnv
 
-    # ================================
-    # Generic testcase environment
-    # ================================
-
-    # dictionary of entity_id to ppid for Zookeeper entities
-    # key: entity_id
-    # val: ppid of Zookeeper associated to that entity_id
-    # { 0: 12345, 1: 12389, ... }
-    entityZkParentPidDict = {}
-
-    # dictionary of entity_id to ppid for broker entities
-    # key: entity_id
-    # val: ppid of broker associated to that entity_id
-    # { 0: 12345, 1: 12389, ... }
-    entityBrokerParentPidDict = {}
-
-    # dictionary of entity_id to ppid for mirror-maker entities
-    # key: entity_id
-    # val: ppid of broker associated to that entity_id
-    # { 0: 12345, 1: 12389, ... }
-    entityMirrorMakerParentPidDict = {}
-
-    # dictionary of entity_id to ppid for migration tool entities
-    # key: entity_id
-    # val: ppid of broker associated to that entity_id
-    # { 0: 12345, 1: 12389, ... }
-    entityMigrationToolParentPidDict = {}
-
-    # dictionary of entity_id to list of JMX ppid
-    # key: entity_id
-    # val: list of JMX ppid associated to that entity_id
-    # { 1: [1234, 1235, 1236], 2: [2234, 2235, 2236], ... }
-    entityJmxParentPidDict = {}
-
-    # dictionary of hostname-topic-ppid for consumer
-    # key: hostname
-    # val: dict of topic-ppid
-    # { host1: { test1 : 12345 }, host1: { test2 : 12389 }, ... }
-    consumerHostParentPidDict = {}
-
-    # dictionary of hostname-topic-ppid for producer
-    # key: hostname
-    # val: dict of topic-ppid
-    # { host1: { test1 : 12345 }, host1: { test2 : 12389 }, ... }
-    producerHostParentPidDict = {}
-
-    # list of testcase configs
-    testcaseConfigsList = []
-
-    # dictionary to keep track of testcase arguments such as replica_factor, num_partition
-    testcaseArgumentsDict = {}
+        # ================================
+        # Generic testcase environment
+        # ================================
 
+        # dictionary of entity_id to ppid for Zookeeper entities
+        # key: entity_id
+        # val: ppid of Zookeeper associated to that entity_id
+        # { 0: 12345, 1: 12389, ... }
+        self.entityZkParentPidDict = {}
+
+        # dictionary of entity_id to ppid for broker entities
+        # key: entity_id
+        # val: ppid of broker associated to that entity_id
+        # { 0: 12345, 1: 12389, ... }
+        self.entityBrokerParentPidDict = {}
+
+        # dictionary of entity_id to ppid for mirror-maker entities
+        # key: entity_id
+        # val: ppid of broker associated to that entity_id
+        # { 0: 12345, 1: 12389, ... }
+        self.entityMirrorMakerParentPidDict = {}
+
+        # dictionary of entity_id to ppid for console-consumer entities
+        # key: entity_id
+        # val: ppid of console consumer associated to that entity_id
+        # { 0: 12345, 1: 12389, ... }
+        self.entityConsoleConsumerParentPidDict = {}
+
+        # dictionary of entity_id to ppid for migration tool entities
+        # key: entity_id
+        # val: ppid of broker associated to that entity_id
+        # { 0: 12345, 1: 12389, ... }
+        self.entityMigrationToolParentPidDict = {}
+
+        # dictionary of entity_id to list of JMX ppid
+        # key: entity_id
+        # val: list of JMX ppid associated to that entity_id
+        # { 1: [1234, 1235, 1236], 2: [2234, 2235, 2236], ... }
+        self.entityJmxParentPidDict = {}
+
+        # dictionary of hostname-topic-ppid for consumer
+        # key: hostname
+        # val: dict of topic-ppid
+        # { host1: { test1 : 12345 }, host1: { test2 : 12389 }, ... }
+        self.consumerHostParentPidDict = {}
+
+        # dictionary of hostname-topic-ppid for producer
+        # key: hostname
+        # val: dict of topic-ppid
+        # { host1: { test1 : 12345 }, host1: { test2 : 12389 }, ... }
+        self.producerHostParentPidDict = {}
+
+        # list of testcase configs
+        self.testcaseConfigsList = []
+
+        # dictionary to keep track of testcase arguments such as replica_factor, num_partition
+        self.testcaseArgumentsDict = {}
 
-    def __init__(self, systemTestEnv, classInstance):
-        self.systemTestEnv    = systemTestEnv
 
         # gather the test case related info and add to an SystemTestEnv object
         self.testcaseResultsDict = {}
diff --git a/topics.json b/topics.json
new file mode 100644
index 0000000000000..ff011ed381e78
--- /dev/null
+++ b/topics.json
@@ -0,0 +1,4 @@
+{"topics":                             
+                                        	[{"topic": "foo"}], 
+                                        "version":1                            
+                                        }
diff --git a/vagrant/README.md b/vagrant/README.md
new file mode 100644
index 0000000000000..73cf0390bc4c7
--- /dev/null
+++ b/vagrant/README.md
@@ -0,0 +1,126 @@
+# Apache Kafka #
+
+Using Vagrant to get up and running.
+
+1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/)
+2) Install Vagrant >= 1.6.4 [http://www.vagrantup.com/](http://www.vagrantup.com/)
+3) Install Vagrant Plugins:
+
+    # Required
+    $ vagrant plugin install vagrant-hostmanager
+    # Optional
+    $ vagrant plugin install vagrant-cachier # Caches & shares package downloads across VMs
+
+In the main Kafka folder, do a normal Kafka build:
+
+    $ gradle
+    $ ./gradlew jar
+
+You can override default settings in `Vagrantfile.local`, which is a Ruby file
+that is ignored by git and imported into the Vagrantfile.
+One setting you likely want to enable
+in `Vagrantfile.local` is `enable_dns = true` to put hostnames in the host's
+/etc/hosts file. You probably want this to avoid having to use IP addresses when
+addressing the cluster from outside the VMs, e.g. if you run a client on the
+host. It's disabled by default since it requires `sudo` access, mucks with your
+system state, and breaks with naming conflicts if you try to run multiple
+clusters concurrently.
+
+Now bring up the cluster:
+
+    $ vagrant up --no-provision && vagrant provision
+
+We separate out the two steps (bringing up the base VMs and configuring them)
+due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to
+collect IPs for all nodes before starting ZooKeeper nodes.
+
+Once this completes:
+
+* Zookeeper will be running on 192.168.50.11 (and `zk1` if you used enable_dns)
+* Broker 1 on 192.168.50.51 (and `broker1` if you used enable_dns)
+* Broker 2 on 192.168.50.52 (and `broker2` if you used enable_dns)
+* Broker 3 on 192.168.50.53 (and `broker3` if you used enable_dns)
+
+To log into one of the machines:
+
+    vagrant ssh <machineName>
+
+You can access the brokers and zookeeper by their IP or hostname, e.g.
+
+    # Specify ZooKeeper node 1 by it's IP: 192.168.50.11
+    bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox
+
+    # Specify brokers by their hostnames: broker1, broker2, broker3
+    bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox
+
+    # Specify ZooKeeper node by its hostname: zk1
+    bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning
+
+If you need to update the running cluster, you can re-run the provisioner (the
+step that installs software and configures services):
+
+    vagrant provision
+
+Note that this doesn't currently ensure a fresh start -- old cluster state will
+still remain intact after everything restarts. This can be useful for updating
+the cluster to your most recent development version.
+
+Finally, you can clean up the cluster by destroying all the VMs:
+
+    vagrant destroy
+
+## Configuration ##
+
+You can override some default settings by specifying the values in
+`Vagrantfile.local`. It is interpreted as a Ruby file, although you'll probably
+only ever need to change a few simple configuration variables. Some values you
+might want to override:
+
+* `enable_dns` - Register each VM with a hostname in /etc/hosts on the
+  hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only
+  necessary if you want to address them conveniently from the host for tasks
+  that aren't provided by Vagrant.
+* `num_zookeepers` - Size of zookeeper cluster
+* `num_brokers` - Number of broker instances to run
+
+
+## Using Other Providers ##
+
+### EC2 ###
+
+Install the `vagrant-aws` plugin to provide EC2 support:
+
+    $ vagrant plugin install vagrant-aws
+
+Next, configure parameters in `Vagrantfile.local`. A few are *required*:
+`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and
+`ec2_security_groups`. A couple of important notes:
+
+1. You definitely want to use `enable_dns` if you plan to run clients outside of
+   the cluster (e.g. from your local host). If you don't, you'll need to go
+   lookup `vagrant ssh-config`.
+
+2. You'll have to setup a reasonable security group yourself. You'll need to
+   open ports for Zookeeper (2888 & 3888 between ZK nodes, 2181 for clients) and
+   Kafka (9092). Beware that opening these ports to all sources (e.g. so you can
+   run producers/consumers locally) will allow anyone to access your Kafka
+   cluster. All other settings have reasonable defaults for setting up an
+   Ubuntu-based cluster, but you may want to customize instance type, region,
+   AMI, etc.
+
+3. `ec2_access_key` and `ec2_secret_key` will use the environment variables
+   `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` respectively if they are set and not
+   overridden in `Vagrantfile.local`.
+
+4. If you're launching into a VPC, you must specify `ec2_subnet_id` (the subnet
+   in which to launch the nodes) and `ec2_security_groups` must be a list of
+   security group IDs instead of names, e.g. `sg-34fd3551` instead of
+   `kafka-test-cluster`.
+
+Now start things up, but specify the aws provider:
+
+    $ vagrant up --provider=aws --no-parallel --no-provision && vagrant provision
+
+Your instances should get tagged with a name including your hostname to make
+them identifiable and make it easier to track instances in the AWS management
+console.
diff --git a/vagrant/base.sh b/vagrant/base.sh
new file mode 100644
index 0000000000000..6f28dfed67877
--- /dev/null
+++ b/vagrant/base.sh
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+
+set -e
+
+if [ -z `which javac` ]; then
+    apt-get -y update
+    apt-get install -y software-properties-common python-software-properties
+    add-apt-repository -y ppa:webupd8team/java
+    apt-get -y update
+
+    # Try to share cache. See Vagrantfile for details
+    mkdir -p /var/cache/oracle-jdk7-installer
+    if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then
+        find /tmp/oracle-jdk7-installer-cache/ -not -empty -exec cp '{}' /var/cache/oracle-jdk7-installer/ \;
+    fi
+
+    /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
+    apt-get -y install oracle-java7-installer oracle-java7-set-default
+
+    if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then
+        cp -R /var/cache/oracle-jdk7-installer/* /tmp/oracle-jdk7-installer-cache
+    fi
+fi
+
+chmod a+rw /opt
+if [ ! -e /opt/kafka ]; then
+    ln -s /vagrant /opt/kafka
+fi
diff --git a/bin/run-rat.sh b/vagrant/broker.sh
old mode 100755
new mode 100644
similarity index 50%
rename from bin/run-rat.sh
rename to vagrant/broker.sh
index 1b7bc312e8b42..63f2d4f30c5a4
--- a/bin/run-rat.sh
+++ b/vagrant/broker.sh
@@ -1,4 +1,3 @@
-#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -14,22 +13,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-base_dir=$(dirname $0)/..
-rat_excludes_file=$base_dir/.rat-excludes
+#!/bin/bash
+
+# Usage: brokers.sh <broker ID> <public hostname or IP> <list zookeeper public hostname or IP + port>
 
-if [ -z "$JAVA_HOME" ]; then
-  JAVA="java"
-else
-  JAVA="$JAVA_HOME/bin/java"
-fi
+set -e
 
-rat_command="$JAVA -jar $base_dir/lib/apache-rat-0.8.jar --dir $base_dir "
+BROKER_ID=$1
+PUBLIC_ADDRESS=$2
+PUBLIC_ZOOKEEPER_ADDRESSES=$3
 
-for f in $(cat $rat_excludes_file);
-do
-  rat_command="${rat_command} -e $f"  
-done
+cd /opt/kafka
 
-echo "Running " $rat_command
-$rat_command > $base_dir/rat.out
+sed \
+    -e 's/broker.id=0/'broker.id=$BROKER_ID'/' \
+    -e 's/#advertised.host.name=<hostname routable by clients>/'advertised.host.name=$PUBLIC_ADDRESS'/' \
+    -e 's/zookeeper.connect=localhost:2181/'zookeeper.connect=$PUBLIC_ZOOKEEPER_ADDRESSES'/' \
+    /opt/kafka/config/server.properties > /opt/kafka/config/server-$BROKER_ID.properties
 
+echo "Killing server"
+bin/kafka-server-stop.sh || true
+sleep 5 # Because kafka-server-stop.sh doesn't actually wait
+echo "Starting server"
+bin/kafka-server-start.sh /opt/kafka/config/server-$BROKER_ID.properties 1>> /tmp/broker.log 2>> /tmp/broker.log &
diff --git a/vagrant/zk.sh b/vagrant/zk.sh
new file mode 100644
index 0000000000000..15517f826461d
--- /dev/null
+++ b/vagrant/zk.sh
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+
+# Usage: zk.sh <zkid> <num_zk>
+
+set -e
+
+ZKID=$1
+NUM_ZK=$2
+
+cd /opt/kafka
+
+cp /opt/kafka/config/zookeeper.properties /opt/kafka/config/zookeeper-$ZKID.properties
+echo "initLimit=5" >> /opt/kafka/config/zookeeper-$ZKID.properties
+echo "syncLimit=2" >> /opt/kafka/config/zookeeper-$ZKID.properties
+echo "quorumListenOnAllIPs=true" >> /opt/kafka/config/zookeeper-$ZKID.properties
+for i in `seq 1 $NUM_ZK`; do
+    echo "server.${i}=zk${i}:2888:3888" >> /opt/kafka/config/zookeeper-$ZKID.properties
+done
+
+mkdir -p /tmp/zookeeper
+echo "$ZKID" > /tmp/zookeeper/myid
+
+echo "Killing ZooKeeper"
+bin/zookeeper-server-stop.sh || true
+sleep 5 # Because kafka-server-stop.sh doesn't actually wait
+echo "Starting ZooKeeper"
+bin/zookeeper-server-start.sh config/zookeeper-$ZKID.properties 1>> /tmp/zk.log 2>> /tmp/zk.log &
diff --git a/wrapper.gradle b/wrapper.gradle
new file mode 100644
index 0000000000000..bc6350632e2a6
--- /dev/null
+++ b/wrapper.gradle
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+defaultTasks 'downloadWrapper'
+
+task downloadWrapper(type: Wrapper) {
+    description = "Download the gradle wrapper and requisite files. Overwrites existing wrapper files."
+    gradleVersion = project.gradleVersion
+}
\ No newline at end of file