Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Threading for async batching #330

Merged
merged 5 commits into from
Mar 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions kafka/conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def __init__(self, host, port, timeout=DEFAULT_SOCKET_TIMEOUT_SECONDS):

self.reinit()

def __getnewargs__(self):
return (self.host, self.port, self.timeout)

def __repr__(self):
return "<KafkaConnection host=%s port=%d>" % (self.host, self.port)

Expand Down
52 changes: 26 additions & 26 deletions kafka/producer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
import time

try:
from queue import Empty
from queue import Empty, Queue
except ImportError:
from Queue import Empty
from Queue import Empty, Queue
from collections import defaultdict
from multiprocessing import Queue, Process

from threading import Thread, Event

import six

Expand All @@ -26,20 +27,15 @@


def _send_upstream(queue, client, codec, batch_time, batch_size,
req_acks, ack_timeout):
req_acks, ack_timeout, stop_event):
"""
Listen on the queue for a specified number of messages or till
a specified timeout and send them upstream to the brokers in one
request

NOTE: Ideally, this should have been a method inside the Producer
class. However, multiprocessing module has issues in windows. The
functionality breaks unless this function is kept outside of a class
"""
stop = False
client.reinit()

while not stop:
while not stop_event.is_set():
timeout = batch_time
count = batch_size
send_at = time.time() + timeout
Expand All @@ -56,7 +52,7 @@ def _send_upstream(queue, client, codec, batch_time, batch_size,

# Check if the controller has requested us to stop
if topic_partition == STOP_ASYNC_PRODUCER:
stop = True
stop_event.set()
break

# Adjust the timeout to match the remaining period
Expand Down Expand Up @@ -140,18 +136,22 @@ def __init__(self, client, async=False,
log.warning("Current implementation does not retry Failed messages")
log.warning("Use at your own risk! (or help improve with a PR!)")
self.queue = Queue() # Messages are sent through this queue
self.proc = Process(target=_send_upstream,
args=(self.queue,
self.client.copy(),
self.codec,
batch_send_every_t,
batch_send_every_n,
self.req_acks,
self.ack_timeout))

# Process will die if main thread exits
self.proc.daemon = True
self.proc.start()
self.thread_stop_event = Event()
self.thread = Thread(target=_send_upstream,
args=(self.queue,
self.client.copy(),
self.codec,
batch_send_every_t,
batch_send_every_n,
self.req_acks,
self.ack_timeout,
self.thread_stop_event))

# Thread will die if main thread exits
self.thread.daemon = True
self.thread.start()



def send_messages(self, topic, partition, *msg):
"""
Expand Down Expand Up @@ -208,7 +208,7 @@ def stop(self, timeout=1):
"""
if self.async:
self.queue.put((STOP_ASYNC_PRODUCER, None, None))
self.proc.join(timeout)
self.thread.join(timeout)

if self.proc.is_alive():
self.proc.terminate()
if self.thread.is_alive():
self.thread_stop_event.set()
44 changes: 44 additions & 0 deletions test/test_conn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import socket
import struct
from threading import Thread

import mock
from . import unittest
Expand Down Expand Up @@ -162,3 +163,46 @@ def test_close__object_is_reusable(self):
self.conn.send(self.config['request_id'], self.config['payload'])
self.assertEqual(self.MockCreateConn.call_count, 1)
self.conn._sock.sendall.assert_called_with(self.config['payload'])


class TestKafkaConnection(unittest.TestCase):

@mock.patch('socket.create_connection')
def test_copy(self, socket):
"""KafkaConnection copies work as expected"""

conn = KafkaConnection('kafka', 9092)
self.assertEqual(socket.call_count, 1)

copy = conn.copy()
self.assertEqual(socket.call_count, 1)
self.assertEqual(copy.host, 'kafka')
self.assertEqual(copy.port, 9092)
self.assertEqual(copy._sock, None)

copy.reinit()
self.assertEqual(socket.call_count, 2)
self.assertNotEqual(copy._sock, None)

@mock.patch('socket.create_connection')
def test_copy_thread(self, socket):
"""KafkaConnection copies work in other threads"""

err = []
copy = KafkaConnection('kafka', 9092).copy()

def thread_func(err, copy):
try:
self.assertEqual(copy.host, 'kafka')
self.assertEqual(copy.port, 9092)
self.assertNotEqual(copy._sock, None)
except Exception as e:
err.append(e)
else:
err.append(None)
thread = Thread(target=thread_func, args=(err, copy))
thread.start()
thread.join()

self.assertEqual(err, [None])
self.assertEqual(socket.call_count, 2)