From 29f21b3de3bfd0bcba3a89e16f5935fb1a505349 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 19:59:56 -0700 Subject: [PATCH 01/54] Workers need not register/unregister sock for every loop --- proxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index b0e2a05a13..a27330fee7 100755 --- a/proxy.py +++ b/proxy.py @@ -855,11 +855,10 @@ def run(self) -> None: ) selector = selectors.DefaultSelector() try: + selector.register(sock, selectors.EVENT_READ) while self.running: with self.lock: - selector.register(sock, selectors.EVENT_READ) events = selector.select(timeout=1) - selector.unregister(sock) if len(events) == 0: continue try: @@ -876,6 +875,7 @@ def run(self) -> None: except KeyboardInterrupt: pass finally: + selector.unregister(sock) sock.close() From 9731bd0ca4425c8a4dfd9cd0834edbe8f6871188 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 20:11:32 -0700 Subject: [PATCH 02/54] No need of explicit socket.settimeout(0) which is same as socket.setblocking(False) --- proxy.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/proxy.py b/proxy.py index a27330fee7..30500de9a5 100755 --- a/proxy.py +++ b/proxy.py @@ -790,7 +790,6 @@ def listen(self) -> None: self.socket.bind((str(self.hostname), self.port)) self.socket.listen(self.backlog) self.socket.setblocking(False) - self.socket.settimeout(0) logger.info('Listening on %s:%d' % (self.hostname, self.port)) def start_workers(self) -> None: @@ -863,9 +862,14 @@ def run(self) -> None: continue try: conn, addr = sock.accept() - except BlockingIOError: # as e: - # logger.exception('BlockingIOError', exc_info=e) + except BlockingIOError: continue + # Spawning new thread for each request is not a good strategy. + # This means to handle 1 million connections, server would need + # to spawn a million threads. + # + # In future we'll merge client selector with server selector, + # so that Worker is the only process per core to select for events. work = self.work_klass( fileno=conn.fileno(), addr=addr, From b3d81f532ae139eba7767d7651e3cfb74b23fe77 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 20:12:17 -0700 Subject: [PATCH 03/54] Remove settimeout assertion --- tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests.py b/tests.py index 8111f0f718..6077be81f6 100644 --- a/tests.py +++ b/tests.py @@ -292,7 +292,6 @@ def test_setup_and_shutdown( sock.bind.assert_called_with((str(acceptor.hostname), acceptor.port)) sock.listen.assert_called_with(acceptor.backlog) sock.setblocking.assert_called_with(False) - sock.settimeout.assert_called_with(0) self.assertTrue(mock_pipe.call_count, num_workers) self.assertTrue(mock_worker.call_count, num_workers) From 637bfe3bbc4c3b70ab147eaa1f5cea46ccb6f1bb Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 20:18:35 -0700 Subject: [PATCH 04/54] Only store sender side of Pipe(). Also ensure both end of the Pipe() are closed on shutdown --- proxy.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/proxy.py b/proxy.py index 30500de9a5..d35eb8ea3e 100755 --- a/proxy.py +++ b/proxy.py @@ -778,8 +778,7 @@ def __init__(self, self.current_worker_id = 0 self.num_workers = num_workers self.workers: List[Worker] = [] - self.work_queues: List[Tuple[connection.Connection, - connection.Connection]] = [] + self.work_queues: List[connection.Connection] = [] self.work_klass = work_klass self.kwargs = kwargs @@ -802,13 +801,15 @@ def start_workers(self) -> None: worker.start() self.workers.append(worker) - self.work_queues.append(work_queue) + self.work_queues.append(work_queue[0]) logger.info('Started %d workers' % self.num_workers) def shutdown(self) -> None: logger.info('Shutting down %d workers' % self.num_workers) for worker in self.workers: worker.join() + for work_queue in self.work_queues: + work_queue.close() def setup(self) -> None: """Listen on port, setup workers and pass server socket to workers.""" @@ -819,8 +820,8 @@ def setup(self) -> None: # Send server socket to workers. assert self.socket is not None for work_queue in self.work_queues: - work_queue[0].send(self.family) - send_handle(work_queue[0], self.socket.fileno(), + work_queue.send(self.family) + send_handle(work_queue, self.socket.fileno(), self.workers[self.current_worker_id].pid) self.socket.close() @@ -881,6 +882,7 @@ def run(self) -> None: finally: selector.unregister(sock) sock.close() + self.work_queue.close() class ProtocolException(Exception): From 4bb94ee00086507f280aab1c2975128d81b73099 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 20:25:22 -0700 Subject: [PATCH 05/54] Make now global. Also we seem to be using datetime.utcnow and time.time for similar purposes --- proxy.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/proxy.py b/proxy.py index d35eb8ea3e..2dcd52c747 100755 --- a/proxy.py +++ b/proxy.py @@ -342,6 +342,10 @@ def new_socket_connection(addr: Tuple[str, int]) -> socket.socket: return conn +def now() -> datetime.datetime: + return datetime.datetime.utcnow() + + class socket_connection(contextlib.ContextDecorator): """Same as new_socket_connection but as a context manager and decorator.""" @@ -2087,7 +2091,7 @@ def __init__(self, fileno: int, addr: Tuple[str, int], self.fileno: int = fileno self.addr: Tuple[str, int] = addr - self.start_time: datetime.datetime = self.now() + self.start_time: datetime.datetime = now() self.last_activity: datetime.datetime = self.start_time self.config: ProtocolConfig = config if config else ProtocolConfig() @@ -2100,10 +2104,6 @@ def __init__(self, fileno: int, addr: Tuple[str, int], ) self.plugins: Dict[str, ProtocolHandlerPlugin] = {} - @staticmethod - def now() -> datetime.datetime: - return datetime.datetime.utcnow() - def initialize(self) -> None: """Optionally upgrades connection to HTTPS, set conn in non-blocking mode and initializes plugins.""" conn = self.optionally_wrap_socket(self.client.connection) @@ -2139,7 +2139,7 @@ def optionally_wrap_socket( return conn def connection_inactive_for(self) -> int: - return (self.now() - self.last_activity).seconds + return (now() - self.last_activity).seconds def is_connection_inactive(self) -> bool: return self.connection_inactive_for() > self.config.timeout @@ -2147,7 +2147,7 @@ def is_connection_inactive(self) -> bool: def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: if self.client.buffer_size() > 0 and self.client.connection in writables: logger.debug('Client is ready for writes, flushing buffer') - self.last_activity = self.now() + self.last_activity = now() # Invoke plugin.on_response_chunk chunk = self.client.buffer @@ -2167,7 +2167,7 @@ def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: if self.client.connection in readables: logger.debug('Client is ready for reads, reading') - self.last_activity = self.now() + self.last_activity = now() client_data = self.client.recv(self.config.client_recvbuf_size) if not client_data: @@ -2436,7 +2436,7 @@ def on_client_connection_close(self) -> None: pass def request_will_be_sent(self) -> Dict[str, Any]: - now = time.time() + _now = time.time() return { 'requestId': self.id, 'loaderId': self.loader_id, @@ -2456,8 +2456,8 @@ def request_will_be_sent(self) -> Dict[str, Any]: 'postData': None if self.request.method != 'POST' else text_(self.request.body) }, - 'timestamp': now - PROXY_PY_START_TIME, - 'wallTime': now, + 'timestamp': _now - PROXY_PY_START_TIME, + 'wallTime': _now, 'initiator': { 'type': 'other' }, From 118e23a6897e38c27ae67375289344c6a1b4589b Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:12:17 -0700 Subject: [PATCH 06/54] Use time.time throughout. Remove incomplete test_cache_responses_plugin to avoid resource leak in tests --- proxy.py | 19 ++++++++----------- tests.py | 10 ---------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/proxy.py b/proxy.py index 2dcd52c747..eba6b63e51 100755 --- a/proxy.py +++ b/proxy.py @@ -342,10 +342,6 @@ def new_socket_connection(addr: Tuple[str, int]) -> socket.socket: return conn -def now() -> datetime.datetime: - return datetime.datetime.utcnow() - - class socket_connection(contextlib.ContextDecorator): """Same as new_socket_connection but as a context manager and decorator.""" @@ -2091,8 +2087,8 @@ def __init__(self, fileno: int, addr: Tuple[str, int], self.fileno: int = fileno self.addr: Tuple[str, int] = addr - self.start_time: datetime.datetime = now() - self.last_activity: datetime.datetime = self.start_time + self.start_time: float = time.time() + self.last_activity: float = self.start_time self.config: ProtocolConfig = config if config else ProtocolConfig() self.request: HttpParser = HttpParser(httpParserTypes.REQUEST_PARSER) @@ -2138,8 +2134,8 @@ def optionally_wrap_socket( conn = ctx.wrap_socket(conn, server_side=True) return conn - def connection_inactive_for(self) -> int: - return (now() - self.last_activity).seconds + def connection_inactive_for(self) -> float: + return time.time() - self.last_activity def is_connection_inactive(self) -> bool: return self.connection_inactive_for() > self.config.timeout @@ -2147,7 +2143,7 @@ def is_connection_inactive(self) -> bool: def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: if self.client.buffer_size() > 0 and self.client.connection in writables: logger.debug('Client is ready for writes, flushing buffer') - self.last_activity = now() + self.last_activity = time.time() # Invoke plugin.on_response_chunk chunk = self.client.buffer @@ -2167,7 +2163,7 @@ def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: if self.client.connection in readables: logger.debug('Client is ready for reads, reading') - self.last_activity = now() + self.last_activity = time.time() client_data = self.client.recv(self.config.client_recvbuf_size) if not client_data: @@ -2802,7 +2798,8 @@ def main(input_args: List[str]) -> None: if (args.cert_file and args.key_file) and \ (args.ca_key_file and args.ca_cert_file and args.ca_signing_key_file): - print('HTTPS interception not supported when proxy.py is serving over HTTPS') + print('You can either enable end-to-end encryption OR TLS interception,' + 'not both together.') sys.exit(0) try: diff --git a/tests.py b/tests.py index 6077be81f6..7014df37b6 100644 --- a/tests.py +++ b/tests.py @@ -1742,11 +1742,6 @@ def test_filter_by_upstream_host_plugin( ) ) - @mock.patch('proxy.TcpServerConnection') - def test_cache_responses_plugin( - self, mock_server_conn: mock.Mock) -> None: - pass - @mock.patch('proxy.TcpServerConnection') def test_man_in_the_middle_plugin( self, mock_server_conn: mock.Mock) -> None: @@ -2081,11 +2076,6 @@ def test_modify_post_data_plugin(self) -> None: ) ) - @mock.patch('proxy.TcpServerConnection') - def test_cache_responses_plugin( - self, mock_server_conn: mock.Mock) -> None: - pass - @mock.patch('proxy.TcpServerConnection') def test_man_in_the_middle_plugin( self, mock_server_conn: mock.Mock) -> None: From f5d0612cb78537d7ea09b6b4942f5a3c4d55d90e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:13:03 -0700 Subject: [PATCH 07/54] Remove unused --- proxy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/proxy.py b/proxy.py index eba6b63e51..a81f255706 100755 --- a/proxy.py +++ b/proxy.py @@ -11,7 +11,6 @@ import argparse import base64 import contextlib -import datetime import errno import functools import hashlib From 51084dd2b845091fe8d09decb43df533fdb8bfdb Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:28:41 -0700 Subject: [PATCH 08/54] Wrap selector register/unregister within a context manager --- proxy.py | 102 +++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/proxy.py b/proxy.py index a81f255706..375311f4fd 100755 --- a/proxy.py +++ b/proxy.py @@ -2139,6 +2139,21 @@ def connection_inactive_for(self) -> float: def is_connection_inactive(self) -> bool: return self.connection_inactive_for() > self.config.timeout + def flush(self) -> None: + if not self.client.has_buffer(): + return + try: + self.selector.register(self.client.connection, selectors.EVENT_WRITE) + while self.client.has_buffer(): + ev: List[Tuple[selectors.SelectorKey, int]] = self.selector.select(timeout=1) + if len(ev) == 0: + continue + self.client.flush() + except BrokenPipeError: + pass + finally: + self.selector.unregister(self.client.connection) + def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: if self.client.buffer_size() > 0 and self.client.connection in writables: logger.debug('Client is ready for writes, flushing buffer') @@ -2212,29 +2227,6 @@ def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: return True return False - def get_events(self) -> Dict[socket.socket, int]: - events: Dict[socket.socket, int] = { - self.client.connection: selectors.EVENT_READ - } - if self.client.has_buffer(): - events[self.client.connection] |= selectors.EVENT_WRITE - - # ProtocolHandlerPlugin.get_descriptors - for plugin in self.plugins.values(): - plugin_read_desc, plugin_write_desc = plugin.get_descriptors() - for r in plugin_read_desc: - if r not in events: - events[r] = selectors.EVENT_READ - else: - events[r] |= selectors.EVENT_READ - for w in plugin_write_desc: - if w not in events: - events[w] = selectors.EVENT_WRITE - else: - events[w] |= selectors.EVENT_WRITE - - return events - def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: """Returns True if proxy must teardown.""" # Flush buffer for ready to write sockets @@ -2276,46 +2268,52 @@ def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List return False - def run_once(self) -> bool: + def get_events(self) -> Dict[socket.socket, int]: + events: Dict[socket.socket, int] = { + self.client.connection: selectors.EVENT_READ + } + if self.client.has_buffer(): + events[self.client.connection] |= selectors.EVENT_WRITE + + # ProtocolHandlerPlugin.get_descriptors + for plugin in self.plugins.values(): + plugin_read_desc, plugin_write_desc = plugin.get_descriptors() + for r in plugin_read_desc: + if r not in events: + events[r] = selectors.EVENT_READ + else: + events[r] |= selectors.EVENT_READ + for w in plugin_write_desc: + if w not in events: + events[w] = selectors.EVENT_WRITE + else: + events[w] |= selectors.EVENT_WRITE + + return events + + @contextlib.contextmanager + def selected_events(self) -> List[Tuple[selectors.SelectorKey, int]]: events = self.get_events() for fd in events: self.selector.register(fd, events[fd]) + yield self.selector.select(timeout=1) + for fd in events.keys(): + self.selector.unregister(fd) - # Select - e: List[Tuple[selectors.SelectorKey, int]] = self.selector.select(timeout=1) + def run_once(self) -> bool: readables = [] writables = [] - for key, mask in e: - if mask & selectors.EVENT_READ: - readables.append(key.fileobj) - if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) - + with self.selected_events() as events: + for key, mask in events: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) teardown = self.handle_events(readables, writables) - - # Unregister - for fd in events.keys(): - self.selector.unregister(fd) - if teardown: return True return False - def flush(self) -> None: - if not self.client.has_buffer(): - return - try: - self.selector.register(self.client.connection, selectors.EVENT_WRITE) - while self.client.has_buffer(): - ev: List[Tuple[selectors.SelectorKey, int]] = self.selector.select(timeout=1) - if len(ev) == 0: - continue - self.client.flush() - except BrokenPipeError: - pass - finally: - self.selector.unregister(self.client.connection) - def run(self) -> None: try: self.initialize() From f8cc85c6ad508a165c73fcb93d4b3ff314c84c07 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:40:27 -0700 Subject: [PATCH 09/54] Refactor in preparation of threadless request handling --- proxy.py | 57 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/proxy.py b/proxy.py index 375311f4fd..e59e71966f 100755 --- a/proxy.py +++ b/proxy.py @@ -2300,25 +2300,27 @@ def selected_events(self) -> List[Tuple[selectors.SelectorKey, int]]: for fd in events.keys(): self.selector.unregister(fd) - def run_once(self) -> bool: + def handle_selected_events(self, events: List[Tuple[selectors.SelectorKey, int]]) -> bool: readables = [] writables = [] - with self.selected_events() as events: - for key, mask in events: - if mask & selectors.EVENT_READ: - readables.append(key.fileobj) - if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) + for key, mask in events: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) teardown = self.handle_events(readables, writables) if teardown: return True return False + def run_once(self) -> bool: + with self.selected_events() as events: + return self.handle_selected_events(events) + def run(self) -> None: try: self.initialize() logger.debug('Handling connection %r' % self.client.connection) - while True: teardown = self.run_once() if teardown: @@ -2332,27 +2334,30 @@ def run(self) -> None: 'Exception while handling connection %r' % self.client.connection, exc_info=e) finally: - # Flush pending buffer if any - self.flush() + self.shutdown() - # Invoke plugin.on_client_connection_close - for plugin in self.plugins.values(): - plugin.on_client_connection_close() + def shutdown(self) -> None: + # Flush pending buffer if any + self.flush() - logger.debug( - 'Closing proxy for connection %r ' - 'at address %r with pending client buffer size %d bytes' % - (self.client.connection, self.client.addr, self.client.buffer_size())) + # Invoke plugin.on_client_connection_close + for plugin in self.plugins.values(): + plugin.on_client_connection_close() - if not self.client.closed: - try: - self.client.connection.shutdown(socket.SHUT_WR) - logger.debug('Client connection shutdown successful') - except OSError: - pass - finally: - self.client.connection.close() - logger.debug('Client connection closed') + logger.debug( + 'Closing proxy for connection %r ' + 'at address %r with pending client buffer size %d bytes' % + (self.client.connection, self.client.addr, self.client.buffer_size())) + + if not self.client.closed: + try: + self.client.connection.shutdown(socket.SHUT_WR) + logger.debug('Client connection shutdown successful') + except OSError: + pass + finally: + self.client.connection.close() + logger.debug('Client connection closed') class DevtoolsProtocolPlugin(ProtocolHandlerPlugin): From 0da94d84629f3bf4d72edb2f1c5612ff2a2ae4af Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:43:22 -0700 Subject: [PATCH 10/54] MyPy generator fix --- proxy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index e59e71966f..3484c8bb2b 100755 --- a/proxy.py +++ b/proxy.py @@ -38,7 +38,8 @@ from multiprocessing import connection from multiprocessing.reduction import send_handle, recv_handle from types import TracebackType -from typing import Any, Dict, List, Tuple, Optional, Union, NamedTuple, Callable, TYPE_CHECKING, Type, cast +from typing import Any, Dict, List, Tuple, Optional, Union, NamedTuple, Callable, Type +from typing import cast, Generator, TYPE_CHECKING from urllib import parse as urlparse from typing_extensions import Protocol @@ -2292,7 +2293,7 @@ def get_events(self) -> Dict[socket.socket, int]: return events @contextlib.contextmanager - def selected_events(self) -> List[Tuple[selectors.SelectorKey, int]]: + def selected_events(self) -> Generator[List[Tuple[selectors.SelectorKey, int]], None, None]: events = self.get_events() for fd in events: self.selector.register(fd, events[fd]) From 7b970e963d155a7e3ca627fd4435b6a4e5671036 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 21:58:36 -0700 Subject: [PATCH 11/54] Add --threadless flag --- README.md | 6 ++++-- proxy.py | 29 +++++++++++++++++++++++++---- tests.py | 8 +++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ea57228e7a..7bc6a1d53d 100644 --- a/README.md +++ b/README.md @@ -905,8 +905,8 @@ usage: proxy.py [-h] [--backlog BACKLOG] [--basic-auth BASIC_AUTH] [--pac-file-url-path PAC_FILE_URL_PATH] [--pid-file PID_FILE] [--plugins PLUGINS] [--port PORT] [--server-recvbuf-size SERVER_RECVBUF_SIZE] - [--static-server-dir STATIC_SERVER_DIR] [--timeout TIMEOUT] - [--version] + [--static-server-dir STATIC_SERVER_DIR] [--threadless] + [--timeout TIMEOUT] [--version] proxy.py v1.2.0 @@ -995,6 +995,8 @@ optional arguments: server root directory. This option is only applicable when static server is also enabled. See --enable- static-server. + --threadless Default: False. When disabled a new thread is spawned + to handle each client connection. --timeout TIMEOUT Default: 10. Number of seconds after which an inactive connection must be dropped. Inactivity is defined by no data sent or received by the client. diff --git a/proxy.py b/proxy.py index 3484c8bb2b..e9d5c67f13 100755 --- a/proxy.py +++ b/proxy.py @@ -90,6 +90,7 @@ DEFAULT_PORT = 8899 DEFAULT_SERVER_RECVBUF_SIZE = DEFAULT_BUFFER_SIZE DEFAULT_STATIC_SERVER_DIR = os.path.join(PROXY_PY_DIR, 'public') +DEFAULT_THREADLESS = False DEFAULT_TIMEOUT = 10 DEFAULT_VERSION = False UNDER_TEST = False # Set to True if under test @@ -765,7 +766,10 @@ def __init__(self, hostname: Union[ipaddress.IPv4Address, ipaddress.IPv6Address], port: int, backlog: int, num_workers: int, - work_klass: type, **kwargs: Any) -> None: + threadless: bool, + work_klass: type, + **kwargs: Any) -> None: + self.threadless = threadless self.running: bool = False self.hostname: Union[ipaddress.IPv4Address, @@ -796,7 +800,12 @@ def start_workers(self) -> None: for worker_id in range(self.num_workers): work_queue = multiprocessing.Pipe() - worker = Worker(work_queue[1], self.work_klass, **self.kwargs) + worker = Worker( + self.threadless, + work_queue[1], + self.work_klass, + **self.kwargs + ) worker.daemon = True worker.start() @@ -837,6 +846,7 @@ class Worker(multiprocessing.Process): def __init__( self, + threadless: bool, work_queue: connection.Connection, work_klass: type, **kwargs: Any): @@ -1007,7 +1017,9 @@ def __init__( enable_static_server: bool = DEFAULT_ENABLE_STATIC_SERVER, devtools_event_queue: Optional[DevtoolsEventQueueType] = None, devtools_ws_path: bytes = DEFAULT_DEVTOOLS_WS_PATH, - timeout: int = DEFAULT_TIMEOUT) -> None: + timeout: int = DEFAULT_TIMEOUT, + threadless: bool = DEFAULT_THREADLESS) -> None: + self.threadless = threadless self.timeout = timeout self.auth_code = auth_code self.server_recvbuf_size = server_recvbuf_size @@ -2764,6 +2776,13 @@ def init_parser() -> argparse.ArgumentParser: 'This option is only applicable when static server is also enabled. ' 'See --enable-static-server.' ) + parser.add_argument( + '--threadless', + action='store_true', + default=DEFAULT_THREADLESS, + help='Default: False. When disabled a new thread is spawned ' + 'to handle each client connection.' + ) parser.add_argument( '--timeout', type=int, @@ -2854,7 +2873,8 @@ def main(input_args: List[str]) -> None: enable_static_server=args.enable_static_server, devtools_event_queue=devtools_event_queue, devtools_ws_path=args.devtools_ws_path, - timeout=args.timeout) + timeout=args.timeout, + threadless=args.threadless) config.plugins = load_plugins( bytes_( @@ -2866,6 +2886,7 @@ def main(input_args: List[str]) -> None: port=config.port, backlog=config.backlog, num_workers=config.num_workers, + threadless=config.threadless, work_klass=ProtocolHandler, config=config) if args.pid_file: diff --git a/tests.py b/tests.py index 7014df37b6..39f9509438 100644 --- a/tests.py +++ b/tests.py @@ -278,6 +278,7 @@ def test_setup_and_shutdown( proxy.DEFAULT_PORT, proxy.DEFAULT_BACKLOG, num_workers, + threadless=proxy.DEFAULT_THREADLESS, work_klass=work_klass, **kwargs ) @@ -314,6 +315,7 @@ def setUp(self, mock_protocol_handler: mock.Mock) -> None: self.pipe = multiprocessing.Pipe() self.protocol_config = proxy.ProtocolConfig() self.worker = proxy.Worker( + proxy.DEFAULT_THREADLESS, self.pipe[1], mock_protocol_handler, config=self.protocol_config) @@ -2169,6 +2171,7 @@ def mock_default_args(mock_args: mock.Mock) -> None: mock_args.devtools_event_queue = None mock_args.devtools_ws_path = proxy.DEFAULT_DEVTOOLS_WS_PATH mock_args.timeout = proxy.DEFAULT_TIMEOUT + mock_args.threadless = proxy.DEFAULT_THREADLESS @mock.patch('time.sleep') @mock.patch('proxy.load_plugins') @@ -2225,7 +2228,8 @@ def test_init_with_no_arguments( enable_static_server=mock_args.enable_static_server, devtools_event_queue=None, devtools_ws_path=proxy.DEFAULT_DEVTOOLS_WS_PATH, - timeout=proxy.DEFAULT_TIMEOUT + timeout=proxy.DEFAULT_TIMEOUT, + threadless=proxy.DEFAULT_THREADLESS, ) mock_acceptor_pool.assert_called_with( @@ -2234,6 +2238,7 @@ def test_init_with_no_arguments( backlog=mock_protocol_config.return_value.backlog, num_workers=mock_protocol_config.return_value.num_workers, work_klass=proxy.ProtocolHandler, + threadless=mock_protocol_config.return_value.threadless, config=mock_protocol_config.return_value, ) mock_acceptor_pool.return_value.setup.assert_called() @@ -2286,6 +2291,7 @@ def test_basic_auth( backlog=config.backlog, num_workers=config.num_workers, work_klass=proxy.ProtocolHandler, + threadless=config.threadless, config=config) self.assertEqual(mock_protocol_config.call_args[1]['auth_code'], b'Basic dXNlcjpwYXNz') From 4811c19f90abd76ad4d9ab13d703424d1c4df9db Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 22:07:21 -0700 Subject: [PATCH 12/54] Internally call them acceptors --- README.md | 8 ++++---- proxy.py | 20 +++++++++----------- tests.py | 4 ++-- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7bc6a1d53d..781491499b 100644 --- a/README.md +++ b/README.md @@ -710,14 +710,14 @@ mechanism. Its responsibility is to establish connection between client and upstream [TcpServerConnection](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L204-L227) and invoke `HttpProxyBasePlugin` lifecycle hooks. -- `ProtocolHandler` threads are started by [Worker](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L424-L472) +- `ProtocolHandler` threads are started by [Acceptor](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L424-L472) processes. -- `--num-workers` `Worker` processes are started by +- `--num-workers` `Acceptor` processes are started by [AcceptorPool](https://github.com/abhinavsingh/proxy.py/blob/b03629fa0df1595eb4995427bc601063be7fdca9/proxy.py#L368-L421) on start-up. -- `AcceptorPool` listens on server socket and pass the handler to `Worker` processes. +- `AcceptorPool` listens on server socket and pass the handler to `Acceptor` processes. Workers are responsible for accepting new client connections and starting `ProtocolHandler` thread. @@ -797,7 +797,7 @@ CLASSES contextlib.ContextDecorator(builtins.object) socket_connection multiprocessing.context.Process(multiprocessing.process.BaseProcess) - Worker + Acceptor threading.Thread(builtins.object) ProtocolHandler ``` diff --git a/proxy.py b/proxy.py index e9d5c67f13..121569acee 100755 --- a/proxy.py +++ b/proxy.py @@ -781,7 +781,7 @@ def __init__(self, self.current_worker_id = 0 self.num_workers = num_workers - self.workers: List[Worker] = [] + self.workers: List[Acceptor] = [] self.work_queues: List[connection.Connection] = [] self.work_klass = work_klass @@ -800,7 +800,7 @@ def start_workers(self) -> None: for worker_id in range(self.num_workers): work_queue = multiprocessing.Pipe() - worker = Worker( + worker = Acceptor( self.threadless, work_queue[1], self.work_klass, @@ -835,7 +835,7 @@ def setup(self) -> None: self.socket.close() -class Worker(multiprocessing.Process): +class Acceptor(multiprocessing.Process): """Socket client acceptor. Accepts client connection over received server socket handle and @@ -851,6 +851,7 @@ def __init__( work_klass: type, **kwargs: Any): super().__init__() + self.threadless: bool = threadless self.work_queue: connection.Connection = work_queue self.work_klass = work_klass self.kwargs = kwargs @@ -875,18 +876,15 @@ def run(self) -> None: conn, addr = sock.accept() except BlockingIOError: continue - # Spawning new thread for each request is not a good strategy. - # This means to handle 1 million connections, server would need - # to spawn a million threads. - # - # In future we'll merge client selector with server selector, - # so that Worker is the only process per core to select for events. work = self.work_klass( fileno=conn.fileno(), addr=addr, **self.kwargs) - work.setDaemon(True) - work.start() + if self.threadless: + pass + else: + work.setDaemon(True) + work.start() except KeyboardInterrupt: pass finally: diff --git a/tests.py b/tests.py index 39f9509438..70d78962ed 100644 --- a/tests.py +++ b/tests.py @@ -258,7 +258,7 @@ class TestAcceptorPool(unittest.TestCase): @mock.patch('proxy.send_handle') @mock.patch('multiprocessing.Pipe') @mock.patch('socket.socket') - @mock.patch('proxy.Worker') + @mock.patch('proxy.Acceptor') def test_setup_and_shutdown( self, mock_worker: mock.Mock, @@ -314,7 +314,7 @@ class TestWorker(unittest.TestCase): def setUp(self, mock_protocol_handler: mock.Mock) -> None: self.pipe = multiprocessing.Pipe() self.protocol_config = proxy.ProtocolConfig() - self.worker = proxy.Worker( + self.worker = proxy.Acceptor( proxy.DEFAULT_THREADLESS, self.pipe[1], mock_protocol_handler, From 0e71deb7f909b8ca1bf999a16914594d585868cc Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 22:15:19 -0700 Subject: [PATCH 13/54] Internally use acceptors --- proxy.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/proxy.py b/proxy.py index 121569acee..f069c3da3d 100755 --- a/proxy.py +++ b/proxy.py @@ -779,9 +779,8 @@ def __init__(self, self.backlog: int = backlog self.socket: Optional[socket.socket] = None - self.current_worker_id = 0 - self.num_workers = num_workers - self.workers: List[Acceptor] = [] + self.num_acceptors = num_workers + self.acceptors: List[Acceptor] = [] self.work_queues: List[connection.Connection] = [] self.work_klass = work_klass @@ -797,26 +796,24 @@ def listen(self) -> None: def start_workers(self) -> None: """Start worker processes.""" - for worker_id in range(self.num_workers): + for _ in range(self.num_acceptors): work_queue = multiprocessing.Pipe() - - worker = Acceptor( + acceptor = Acceptor( self.threadless, work_queue[1], self.work_klass, **self.kwargs ) - worker.daemon = True - worker.start() - - self.workers.append(worker) + acceptor.daemon = True + acceptor.start() + self.acceptors.append(acceptor) self.work_queues.append(work_queue[0]) - logger.info('Started %d workers' % self.num_workers) + logger.info('Started %d workers' % self.num_acceptors) def shutdown(self) -> None: - logger.info('Shutting down %d workers' % self.num_workers) - for worker in self.workers: - worker.join() + logger.info('Shutting down %d workers' % self.num_acceptors) + for acceptor in self.acceptors: + acceptor.join() for work_queue in self.work_queues: work_queue.close() @@ -826,12 +823,13 @@ def setup(self) -> None: self.listen() self.start_workers() - # Send server socket to workers. + # Send server socket to all acceptor processes. assert self.socket is not None - for work_queue in self.work_queues: + for index in range(self.num_acceptors): + work_queue = self.work_queues[index] work_queue.send(self.family) send_handle(work_queue, self.socket.fileno(), - self.workers[self.current_worker_id].pid) + self.acceptors[index].pid) self.socket.close() From 4efe2afc6cd82e134425849360d0ce2ba14b06a0 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 13 Oct 2019 23:28:14 -0700 Subject: [PATCH 14/54] Add Threadless class. Also no need to pass family over pipe to acceptors. --- proxy.py | 187 ++++++++++++++++++++++++++++++++++++++++++------------- tests.py | 31 +++++---- 2 files changed, 158 insertions(+), 60 deletions(-) diff --git a/proxy.py b/proxy.py index f069c3da3d..90838b6e6c 100755 --- a/proxy.py +++ b/proxy.py @@ -799,6 +799,7 @@ def start_workers(self) -> None: for _ in range(self.num_acceptors): work_queue = multiprocessing.Pipe() acceptor = Acceptor( + self.family, self.threadless, work_queue[1], self.work_klass, @@ -826,13 +827,66 @@ def setup(self) -> None: # Send server socket to all acceptor processes. assert self.socket is not None for index in range(self.num_acceptors): - work_queue = self.work_queues[index] - work_queue.send(self.family) - send_handle(work_queue, self.socket.fileno(), - self.acceptors[index].pid) + send_handle( + self.work_queues[index], + self.socket.fileno(), + self.acceptors[index].pid + ) self.socket.close() +class Threadless(multiprocessing.Process): + """Threadless handles lifecycle of multiple ProtocolHandler instances. + + When --threadless option is enabled, each Acceptor process also + spawns one Threadless process. Then instead of spawning a new thread + for each accepted client connection, Acceptor process simply sends + accepted client connection to Threadless process over a pipe. + """ + + def __init__( + self, + client_queue: connection.Connection, + work_klass: type, + **kwargs: Any) -> None: + super().__init__() + self.client_queue = client_queue + self.work_klass = work_klass + self.kwargs = kwargs + + # TODO: Create an abstract class that ProtocolHandler will implement. + # For now hardcode type as ProtocolHandler + self.clients: Dict[int, ProtocolHandler] = {} + self.selector = selectors.DefaultSelector() + + def run(self) -> None: + try: + while True: + # TODO: Recv via selector to make non-blocking + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + + work = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + self.clients[fileno] = work + + events: Dict[socket.socket, int] = work.get_events() + for fd in events: + self.selector.register(fd, events[fd]) + + _ = self.selector.select(timeout=1) + + work.handle_selected_events(events) + for fd in events.keys(): + self.selector.unregister(fd) + except KeyboardInterrupt: + pass + finally: + pass + + class Acceptor(multiprocessing.Process): """Socket client acceptor. @@ -844,51 +898,97 @@ class Acceptor(multiprocessing.Process): def __init__( self, + family: int, threadless: bool, work_queue: connection.Connection, work_klass: type, - **kwargs: Any): + **kwargs: Any) -> None: super().__init__() + self.family: int = family self.threadless: bool = threadless self.work_queue: connection.Connection = work_queue self.work_klass = work_klass self.kwargs = kwargs - self.running = True + + self.running = False + self.selector = selectors.DefaultSelector() + self.sock: Optional[socket.socket] = None + self.threadless_process: Optional[multiprocessing.Process] = None + self.threadless_client_queue: Optional[connection.Connection] = None + + def start_threadless_process(self) -> None: + if not self.threadless: + return + pipe = multiprocessing.Pipe() + self.threadless_client_queue = pipe[0] + self.threadless_process = Threadless( + pipe[1], self.work_klass, **self.kwargs + ) + self.threadless_process.daemon = True + self.threadless_process.start() + + def shutdown_threadless_process(self) -> None: + if not self.threadless: + return + assert self.threadless_process and self.threadless_client_queue + self.threadless_process.join() + self.threadless_client_queue.close() + + def run_once(self) -> bool: + with self.lock: + events = self.selector.select(timeout=1) + if len(events) == 0: + return False + try: + assert self.sock + conn, addr = self.sock.accept() + except BlockingIOError: + return False + if self.threadless and \ + self.threadless_client_queue and \ + self.threadless_process: + self.threadless_client_queue.send(addr) + send_handle( + self.threadless_client_queue, + conn.fileno(), + self.threadless_process.pid + ) + conn.close() + else: + # Starting a new thread per client request simply means + # we need 1 million threads to handle a million concurrent + # connections. Since most of the client requests are short + # lived (even with keep-alive), starting threads is excessive. + work = self.work_klass( + fileno=conn.fileno(), + addr=addr, + **self.kwargs) + work.setDaemon(True) + work.start() + return False def run(self) -> None: - family = self.work_queue.recv() - sock = socket.fromfd( + self.running = True + self.sock = socket.fromfd( recv_handle(self.work_queue), - family=family, + family=self.family, type=socket.SOCK_STREAM ) - selector = selectors.DefaultSelector() try: - selector.register(sock, selectors.EVENT_READ) + self.start_threadless_process() + self.selector.register(self.sock, selectors.EVENT_READ) while self.running: - with self.lock: - events = selector.select(timeout=1) - if len(events) == 0: - continue - try: - conn, addr = sock.accept() - except BlockingIOError: - continue - work = self.work_klass( - fileno=conn.fileno(), - addr=addr, - **self.kwargs) - if self.threadless: - pass - else: - work.setDaemon(True) - work.start() + teardown = self.run_once() + if teardown: + break except KeyboardInterrupt: pass finally: - selector.unregister(sock) - sock.close() + self.selector.unregister(self.sock) + self.shutdown_threadless_process() + self.sock.close() self.work_queue.close() + self.running = False class ProtocolException(Exception): @@ -2301,30 +2401,31 @@ def get_events(self) -> Dict[socket.socket, int]: return events @contextlib.contextmanager - def selected_events(self) -> Generator[List[Tuple[selectors.SelectorKey, int]], None, None]: + def selected_events(self) -> \ + Generator[Tuple[List[Union[int, _HasFileno]], + List[Union[int, _HasFileno]]], + None, None]: events = self.get_events() for fd in events: self.selector.register(fd, events[fd]) - yield self.selector.select(timeout=1) - for fd in events.keys(): - self.selector.unregister(fd) - - def handle_selected_events(self, events: List[Tuple[selectors.SelectorKey, int]]) -> bool: + ev = self.selector.select(timeout=1) readables = [] writables = [] - for key, mask in events: + for key, mask in ev: if mask & selectors.EVENT_READ: readables.append(key.fileobj) if mask & selectors.EVENT_WRITE: writables.append(key.fileobj) - teardown = self.handle_events(readables, writables) - if teardown: - return True - return False + yield (readables, writables) + for fd in events.keys(): + self.selector.unregister(fd) def run_once(self) -> bool: - with self.selected_events() as events: - return self.handle_selected_events(events) + with self.selected_events() as (readables, writables): + teardown = self.handle_events(readables, writables) + if teardown: + return True + return False def run(self) -> None: try: diff --git a/tests.py b/tests.py index 70d78962ed..9f42c5267b 100644 --- a/tests.py +++ b/tests.py @@ -310,25 +310,29 @@ def test_setup_and_shutdown( class TestWorker(unittest.TestCase): + @mock.patch('selectors.DefaultSelector') @mock.patch('proxy.ProtocolHandler') - def setUp(self, mock_protocol_handler: mock.Mock) -> None: + def setUp( + self, + mock_protocol_handler: mock.Mock, + mock_selector: mock.Mock) -> None: + self.mock_protocol_handler = mock_protocol_handler + self.mock_selector = mock_selector self.pipe = multiprocessing.Pipe() self.protocol_config = proxy.ProtocolConfig() self.worker = proxy.Acceptor( + socket.AF_INET6, proxy.DEFAULT_THREADLESS, self.pipe[1], mock_protocol_handler, config=self.protocol_config) - self.mock_protocol_handler = mock_protocol_handler - @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_continues_when_no_events( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_fromfd: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -336,23 +340,20 @@ def test_continues_when_no_events( mock_fromfd.return_value.accept.return_value = (conn, addr) mock_recv_handle.return_value = fileno - selector = mock_selector.return_value + selector = self.mock_selector.return_value selector.select.side_effect = [[], KeyboardInterrupt()] - self.pipe[0].send(socket.AF_INET6) self.worker.run() sock.accept.assert_not_called() self.mock_protocol_handler.assert_not_called() - @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_worker_doesnt_teardown_on_blocking_io_error( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_fromfd: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -360,23 +361,20 @@ def test_worker_doesnt_teardown_on_blocking_io_error( mock_fromfd.return_value.accept.return_value = (conn, addr) mock_recv_handle.return_value = fileno - selector = mock_selector.return_value + selector = self.mock_selector.return_value selector.select.side_effect = [(None, None), KeyboardInterrupt()] sock.accept.side_effect = BlockingIOError() - self.pipe[0].send(socket.AF_INET6) self.worker.run() self.mock_protocol_handler.assert_not_called() - @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_accepts_client_from_server_socket( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_fromfd: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -386,10 +384,9 @@ def test_accepts_client_from_server_socket( self.mock_protocol_handler.return_value.start.side_effect = KeyboardInterrupt() - selector = mock_selector.return_value + selector = self.mock_selector.return_value selector.select.return_value = [(None, None)] - self.pipe[0].send(socket.AF_INET6) self.worker.run() selector.register.assert_called_with(sock, selectors.EVENT_READ) From 1f1d6dbd781e1e45731341577b26aa34847c0f3e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 00:19:31 -0700 Subject: [PATCH 15/54] Make threadless work for a single client :) --- proxy.py | 75 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/proxy.py b/proxy.py index 90838b6e6c..eb8df73a06 100755 --- a/proxy.py +++ b/proxy.py @@ -805,7 +805,7 @@ def start_workers(self) -> None: self.work_klass, **self.kwargs ) - acceptor.daemon = True + # acceptor.daemon = True acceptor.start() self.acceptors.append(acceptor) self.work_queues.append(work_queue[0]) @@ -857,28 +857,49 @@ def __init__( # TODO: Create an abstract class that ProtocolHandler will implement. # For now hardcode type as ProtocolHandler self.clients: Dict[int, ProtocolHandler] = {} - self.selector = selectors.DefaultSelector() + self.selector: Optional[selectors.DefaultSelector] = None def run(self) -> None: + self.selector = selectors.DefaultSelector() try: + self.selector.register(self.client_queue, selectors.EVENT_READ) while True: - # TODO: Recv via selector to make non-blocking - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) + events: Dict[socket.socket, int] = {} + for work in self.clients.values(): + events.update(work.get_events()) - work = self.work_klass( - fileno=fileno, - addr=addr, - **self.kwargs) - self.clients[fileno] = work - - events: Dict[socket.socket, int] = work.get_events() for fd in events: self.selector.register(fd, events[fd]) - _ = self.selector.select(timeout=1) + ev = self.selector.select(timeout=1) + if len(ev) > 0: + readables = [] + writables = [] + for key, mask in ev: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) + + # Receive accepted client connection + if self.client_queue in readables: + readables.remove(self.client_queue) + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + self.clients[fileno] = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + self.clients[fileno].initialize() + + if len(self.clients.keys()) > 0 and \ + (len(readables) > 0 or len(writables) > 0): + work = self.clients[list(self.clients.keys())[0]] + teardown = work.handle_events(readables, writables) + if teardown: + work.shutdown() + del self.clients[list(self.clients.keys())[0]] - work.handle_selected_events(events) for fd in events.keys(): self.selector.unregister(fd) except KeyboardInterrupt: @@ -898,20 +919,20 @@ class Acceptor(multiprocessing.Process): def __init__( self, - family: int, + family: socket.AddressFamily, threadless: bool, work_queue: connection.Connection, work_klass: type, **kwargs: Any) -> None: super().__init__() - self.family: int = family + self.family: socket.AddressFamily = family self.threadless: bool = threadless self.work_queue: connection.Connection = work_queue self.work_klass = work_klass self.kwargs = kwargs self.running = False - self.selector = selectors.DefaultSelector() + self.selector: Optional[selectors.DefaultSelector] = None self.sock: Optional[socket.socket] = None self.threadless_process: Optional[multiprocessing.Process] = None self.threadless_client_queue: Optional[connection.Connection] = None @@ -924,7 +945,7 @@ def start_threadless_process(self) -> None: self.threadless_process = Threadless( pipe[1], self.work_klass, **self.kwargs ) - self.threadless_process.daemon = True + # self.threadless_process.daemon = True self.threadless_process.start() def shutdown_threadless_process(self) -> None: @@ -934,16 +955,16 @@ def shutdown_threadless_process(self) -> None: self.threadless_process.join() self.threadless_client_queue.close() - def run_once(self) -> bool: + def run_once(self) -> None: with self.lock: events = self.selector.select(timeout=1) if len(events) == 0: - return False + return try: assert self.sock conn, addr = self.sock.accept() except BlockingIOError: - return False + return if self.threadless and \ self.threadless_client_queue and \ self.threadless_process: @@ -963,24 +984,22 @@ def run_once(self) -> bool: fileno=conn.fileno(), addr=addr, **self.kwargs) - work.setDaemon(True) + # work.setDaemon(True) work.start() - return False def run(self) -> None: self.running = True + self.selector = selectors.DefaultSelector() self.sock = socket.fromfd( recv_handle(self.work_queue), family=self.family, type=socket.SOCK_STREAM ) try: - self.start_threadless_process() self.selector.register(self.sock, selectors.EVENT_READ) + self.start_threadless_process() while self.running: - teardown = self.run_once() - if teardown: - break + self.run_once() except KeyboardInterrupt: pass finally: @@ -1869,7 +1888,7 @@ def start_dispatcher(self) -> None: args=(self.event_dispatcher_shutdown, self.config.devtools_event_queue, self.client)) - self.event_dispatcher_thread.setDaemon(True) + # self.event_dispatcher_thread.setDaemon(True) self.event_dispatcher_thread.start() def stop_dispatcher(self) -> None: From 2d0228909c4a0130a6f0baded52173a43549db93 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 01:35:51 -0700 Subject: [PATCH 16/54] Threadless is soon be our default --- proxy.py | 68 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/proxy.py b/proxy.py index eb8df73a06..b719da3439 100755 --- a/proxy.py +++ b/proxy.py @@ -872,33 +872,41 @@ def run(self) -> None: self.selector.register(fd, events[fd]) ev = self.selector.select(timeout=1) - if len(ev) > 0: - readables = [] - writables = [] - for key, mask in ev: - if mask & selectors.EVENT_READ: - readables.append(key.fileobj) - if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) - - # Receive accepted client connection - if self.client_queue in readables: - readables.remove(self.client_queue) - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) - self.clients[fileno] = self.work_klass( - fileno=fileno, - addr=addr, - **self.kwargs) - self.clients[fileno].initialize() - - if len(self.clients.keys()) > 0 and \ - (len(readables) > 0 or len(writables) > 0): - work = self.clients[list(self.clients.keys())[0]] - teardown = work.handle_events(readables, writables) - if teardown: - work.shutdown() - del self.clients[list(self.clients.keys())[0]] + if len(ev) == 0: + for fd in events.keys(): + self.selector.unregister(fd) + continue + + readables = [] + writables = [] + for key, mask in ev: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) + + # Receive accepted client connection + if self.client_queue in readables: + readables.remove(self.client_queue) + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + self.clients[fileno] = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + self.clients[fileno].initialize() + + # TODO: Only send readable / writables that client requested for + # Downside is that for thousands of connections, we'll be passing + # large lists back and forth for no actions. + shutdown_fileno: List[int] = [] + for fileno in self.clients: + teardown = self.clients[fileno].handle_events(readables, writables) + if teardown: + self.clients[fileno].shutdown() + shutdown_fileno.append(fileno) + for fileno in shutdown_fileno: + del self.clients[fileno] for fd in events.keys(): self.selector.unregister(fd) @@ -956,6 +964,7 @@ def shutdown_threadless_process(self) -> None: self.threadless_client_queue.close() def run_once(self) -> None: + assert self.selector with self.lock: events = self.selector.select(timeout=1) if len(events) == 0: @@ -2191,11 +2200,12 @@ def on_client_connection_close(self) -> None: def access_log(self) -> None: logger.info( - '%s:%s - %s %s' % + '%s:%s - %s %s - %.2f ms' % (self.client.addr[0], self.client.addr[1], text_(self.request.method), - text_(self.request.path))) + text_(self.request.path), + (time.time() - self.start_time) * 1000)) def get_descriptors( self) -> Tuple[List[socket.socket], List[socket.socket]]: From b5532de27b7041fc99a267a5bfcbf803150064e7 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 02:22:10 -0700 Subject: [PATCH 17/54] Close client queue --- proxy.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/proxy.py b/proxy.py index b719da3439..41e0240287 100755 --- a/proxy.py +++ b/proxy.py @@ -860,9 +860,10 @@ def __init__( self.selector: Optional[selectors.DefaultSelector] = None def run(self) -> None: - self.selector = selectors.DefaultSelector() try: + self.selector = selectors.DefaultSelector() self.selector.register(self.client_queue, selectors.EVENT_READ) + while True: events: Dict[socket.socket, int] = {} for work in self.clients.values(): @@ -899,13 +900,13 @@ def run(self) -> None: # TODO: Only send readable / writables that client requested for # Downside is that for thousands of connections, we'll be passing # large lists back and forth for no actions. - shutdown_fileno: List[int] = [] + teared_down = [] for fileno in self.clients: teardown = self.clients[fileno].handle_events(readables, writables) if teardown: - self.clients[fileno].shutdown() - shutdown_fileno.append(fileno) - for fileno in shutdown_fileno: + teared_down.append(fileno) + for fileno in teared_down: + self.clients[fileno].shutdown() del self.clients[fileno] for fd in events.keys(): @@ -913,7 +914,7 @@ def run(self) -> None: except KeyboardInterrupt: pass finally: - pass + self.client_queue.close() class Acceptor(multiprocessing.Process): From 9a2739672c3aafdefbb05446d5848d90f7442b73 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 10:58:36 -0700 Subject: [PATCH 18/54] Use context manager for register/unregister --- proxy.py | 95 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/proxy.py b/proxy.py index 41e0240287..09f815d995 100755 --- a/proxy.py +++ b/proxy.py @@ -859,58 +859,56 @@ def __init__( self.clients: Dict[int, ProtocolHandler] = {} self.selector: Optional[selectors.DefaultSelector] = None + @contextlib.contextmanager + def selected_events(self) -> Generator[Tuple[List[Union[int, _HasFileno]], + List[Union[int, _HasFileno]]], + None, None]: + events: Dict[socket.socket, int] = {} + for work in self.clients.values(): + events.update(work.get_events()) + for fd in events: + self.selector.register(fd, events[fd]) + ev = self.selector.select(timeout=1) + readables = [] + writables = [] + for key, mask in ev: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) + yield (readables, writables) + for fd in events.keys(): + self.selector.unregister(fd) + def run(self) -> None: try: self.selector = selectors.DefaultSelector() self.selector.register(self.client_queue, selectors.EVENT_READ) while True: - events: Dict[socket.socket, int] = {} - for work in self.clients.values(): - events.update(work.get_events()) - - for fd in events: - self.selector.register(fd, events[fd]) - - ev = self.selector.select(timeout=1) - if len(ev) == 0: - for fd in events.keys(): - self.selector.unregister(fd) - continue - - readables = [] - writables = [] - for key, mask in ev: - if mask & selectors.EVENT_READ: - readables.append(key.fileobj) - if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) - - # Receive accepted client connection - if self.client_queue in readables: - readables.remove(self.client_queue) - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) - self.clients[fileno] = self.work_klass( - fileno=fileno, - addr=addr, - **self.kwargs) - self.clients[fileno].initialize() - - # TODO: Only send readable / writables that client requested for - # Downside is that for thousands of connections, we'll be passing - # large lists back and forth for no actions. - teared_down = [] - for fileno in self.clients: - teardown = self.clients[fileno].handle_events(readables, writables) - if teardown: - teared_down.append(fileno) - for fileno in teared_down: - self.clients[fileno].shutdown() - del self.clients[fileno] - - for fd in events.keys(): - self.selector.unregister(fd) + with self.selected_events() as (readables, writables): + if len(readables) == 0 and len(writables) == 0: + continue + + # TODO: Only send readable / writables that client is expecting. + teared_down = [] + for fileno in self.clients: + teardown = self.clients[fileno].handle_events(readables, writables) + if teardown: + teared_down.append(fileno) + for fileno in teared_down: + self.clients[fileno].shutdown() + del self.clients[fileno] + + # Receive accepted client connection + if self.client_queue in readables: + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + self.clients[fileno] = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + self.clients[fileno].initialize() except KeyboardInterrupt: pass finally: @@ -2082,6 +2080,11 @@ def __init__( self.routes[protocol][path] = instance def serve_file_or_404(self, path: str) -> bool: + """Read and serves a file from disk. + + Queues 404 Not Found for IOError. + Shouldn't this be server error? + """ try: with open(path, 'rb') as f: content = f.read() From 4688ad9f4e70e3c00f8407e2864e01b57a5c8abb Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 11:08:21 -0700 Subject: [PATCH 19/54] Fix Acceptor tests broken after refactoring --- tests.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests.py b/tests.py index 9f42c5267b..ac76487ada 100644 --- a/tests.py +++ b/tests.py @@ -310,14 +310,11 @@ def test_setup_and_shutdown( class TestWorker(unittest.TestCase): - @mock.patch('selectors.DefaultSelector') @mock.patch('proxy.ProtocolHandler') def setUp( self, - mock_protocol_handler: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_protocol_handler: mock.Mock) -> None: self.mock_protocol_handler = mock_protocol_handler - self.mock_selector = mock_selector self.pipe = multiprocessing.Pipe() self.protocol_config = proxy.ProtocolConfig() self.worker = proxy.Acceptor( @@ -327,12 +324,14 @@ def setUp( mock_protocol_handler, config=self.protocol_config) + @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_continues_when_no_events( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock) -> None: + mock_fromfd: mock.Mock, + mock_selector: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -340,7 +339,7 @@ def test_continues_when_no_events( mock_fromfd.return_value.accept.return_value = (conn, addr) mock_recv_handle.return_value = fileno - selector = self.mock_selector.return_value + selector = mock_selector.return_value selector.select.side_effect = [[], KeyboardInterrupt()] self.worker.run() @@ -348,12 +347,14 @@ def test_continues_when_no_events( sock.accept.assert_not_called() self.mock_protocol_handler.assert_not_called() + @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_worker_doesnt_teardown_on_blocking_io_error( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock) -> None: + mock_fromfd: mock.Mock, + mock_selector: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -361,7 +362,7 @@ def test_worker_doesnt_teardown_on_blocking_io_error( mock_fromfd.return_value.accept.return_value = (conn, addr) mock_recv_handle.return_value = fileno - selector = self.mock_selector.return_value + selector = mock_selector.return_value selector.select.side_effect = [(None, None), KeyboardInterrupt()] sock.accept.side_effect = BlockingIOError() @@ -369,12 +370,14 @@ def test_worker_doesnt_teardown_on_blocking_io_error( self.mock_protocol_handler.assert_not_called() + @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') def test_accepts_client_from_server_socket( self, mock_recv_handle: mock.Mock, - mock_fromfd: mock.Mock) -> None: + mock_fromfd: mock.Mock, + mock_selector: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -384,7 +387,7 @@ def test_accepts_client_from_server_socket( self.mock_protocol_handler.return_value.start.side_effect = KeyboardInterrupt() - selector = self.mock_selector.return_value + selector = mock_selector.return_value selector.select.return_value = [(None, None)] self.worker.run() @@ -402,7 +405,7 @@ def test_accepts_client_from_server_socket( addr=addr, **{'config': self.protocol_config} ) - self.mock_protocol_handler.return_value.setDaemon.assert_called() + # self.mock_protocol_handler.return_value.setDaemon.assert_called() self.mock_protocol_handler.return_value.start.assert_called() sock.close.assert_called() From 6af4e613b59f13f6b477c7fe6a5f87f6d9ae0949 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 11:37:04 -0700 Subject: [PATCH 20/54] Use asyncio tasks to invoke ProtocolHandle.handle_events This gives all client threads a chance to respond without waiting for other handlers to return. --- proxy.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/proxy.py b/proxy.py index 09f815d995..9162913da7 100755 --- a/proxy.py +++ b/proxy.py @@ -9,6 +9,7 @@ :license: BSD, see LICENSE for more details. """ import argparse +import asyncio import base64 import contextlib import errno @@ -866,6 +867,7 @@ def selected_events(self) -> Generator[Tuple[List[Union[int, _HasFileno]], events: Dict[socket.socket, int] = {} for work in self.clients.values(): events.update(work.get_events()) + assert self.selector is not None for fd in events: self.selector.register(fd, events[fd]) ev = self.selector.select(timeout=1) @@ -880,26 +882,34 @@ def selected_events(self) -> Generator[Tuple[List[Union[int, _HasFileno]], for fd in events.keys(): self.selector.unregister(fd) + async def handle_events( + self, fileno: int, + readables: List[Union[int, _HasFileno]], + writables: List[Union[int, _HasFileno]]) -> bool: + return self.clients[fileno].handle_events(readables, writables) + + # TODO: Use correct future typing annotations + async def wait_for_tasks( + self, tasks: Dict[int, Any]) -> None: + for fileno in tasks: + teardown = await tasks[fileno] + if teardown: + self.clients[fileno].shutdown() + del self.clients[fileno] + def run(self) -> None: try: self.selector = selectors.DefaultSelector() self.selector.register(self.client_queue, selectors.EVENT_READ) - while True: with self.selected_events() as (readables, writables): if len(readables) == 0 and len(writables) == 0: continue - # TODO: Only send readable / writables that client is expecting. - teared_down = [] + tasks = {} for fileno in self.clients: - teardown = self.clients[fileno].handle_events(readables, writables) - if teardown: - teared_down.append(fileno) - for fileno in teared_down: - self.clients[fileno].shutdown() - del self.clients[fileno] - + tasks[fileno] = asyncio.ensure_future( + self.handle_events(fileno, readables, writables)) # Receive accepted client connection if self.client_queue in readables: addr = self.client_queue.recv() @@ -909,6 +919,7 @@ def run(self) -> None: addr=addr, **self.kwargs) self.clients[fileno].initialize() + asyncio.run(self.wait_for_tasks(tasks)) except KeyboardInterrupt: pass finally: From 9027fd8ac41fa4e3a107bc38a2c4daca48be289d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 12:13:03 -0700 Subject: [PATCH 21/54] Explicitly initialize event loop per Threadless process --- proxy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index 9162913da7..27ab23fe40 100755 --- a/proxy.py +++ b/proxy.py @@ -859,6 +859,7 @@ def __init__( # For now hardcode type as ProtocolHandler self.clients: Dict[int, ProtocolHandler] = {} self.selector: Optional[selectors.DefaultSelector] = None + self.loop: Optional[asyncio.AbstractEventLoop] = None @contextlib.contextmanager def selected_events(self) -> Generator[Tuple[List[Union[int, _HasFileno]], @@ -901,6 +902,7 @@ def run(self) -> None: try: self.selector = selectors.DefaultSelector() self.selector.register(self.client_queue, selectors.EVENT_READ) + self.loop = asyncio.get_event_loop() while True: with self.selected_events() as (readables, writables): if len(readables) == 0 and len(writables) == 0: @@ -908,7 +910,7 @@ def run(self) -> None: # TODO: Only send readable / writables that client is expecting. tasks = {} for fileno in self.clients: - tasks[fileno] = asyncio.ensure_future( + tasks[fileno] = self.loop.create_task( self.handle_events(fileno, readables, writables)) # Receive accepted client connection if self.client_queue in readables: @@ -919,11 +921,13 @@ def run(self) -> None: addr=addr, **self.kwargs) self.clients[fileno].initialize() - asyncio.run(self.wait_for_tasks(tasks)) + self.loop.run_until_complete(self.wait_for_tasks(tasks)) except KeyboardInterrupt: pass finally: + self.selector.unregister(self.client_queue) self.client_queue.close() + self.loop.close() class Acceptor(multiprocessing.Process): From 5b05141da0d522e7ff6283022b8cf0827a9bd9fd Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 12:20:43 -0700 Subject: [PATCH 22/54] Mypy fixes --- proxy.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/proxy.py b/proxy.py index 27ab23fe40..56292411c0 100755 --- a/proxy.py +++ b/proxy.py @@ -895,6 +895,9 @@ async def wait_for_tasks( for fileno in tasks: teardown = await tasks[fileno] if teardown: + # TODO: Shutdown can block too + # Currently calls flush which can use + # ProtocolHandler.selector self.clients[fileno].shutdown() del self.clients[fileno] @@ -907,7 +910,7 @@ def run(self) -> None: with self.selected_events() as (readables, writables): if len(readables) == 0 and len(writables) == 0: continue - # TODO: Only send readable / writables that client is expecting. + # TODO: Only send readable / writables that client originally registered. tasks = {} for fileno in self.clients: tasks[fileno] = self.loop.create_task( @@ -925,8 +928,10 @@ def run(self) -> None: except KeyboardInterrupt: pass finally: + assert self.selector is not None self.selector.unregister(self.client_queue) self.client_queue.close() + assert self.loop is not None self.loop.close() From fdc673c910da8e521181b7111e6026b669dcaedb Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 12:59:41 -0700 Subject: [PATCH 23/54] Add ThreadlessWork abstract class implemented by ProtocolHandler --- proxy.py | 227 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 102 deletions(-) diff --git a/proxy.py b/proxy.py index 56292411c0..cf9a30c0f1 100755 --- a/proxy.py +++ b/proxy.py @@ -836,13 +836,38 @@ def setup(self) -> None: self.socket.close() +class ThreadlessWork(ABC): + """Implement ThreadlessWork to hook into the event loop provided by Threadless process.""" + + @abstractmethod + def initialize(self) -> None: + pass # pragma: no cover + + @abstractmethod + def get_events(self) -> Dict[socket.socket, int]: + return {} # pragma: no cover + + @abstractmethod + def handle_events(self, + readables: List[Union[int, _HasFileno]], + writables: List[Union[int, _HasFileno]]) -> bool: + return False # pragma: no cover + + @abstractmethod + def shutdown(self) -> None: + pass # pragma: no cover + + class Threadless(multiprocessing.Process): - """Threadless handles lifecycle of multiple ProtocolHandler instances. + """Threadless provides an event loop. Use it by implementing Threadless class. When --threadless option is enabled, each Acceptor process also - spawns one Threadless process. Then instead of spawning a new thread - for each accepted client connection, Acceptor process simply sends + spawns one Threadless process. And instead of spawning new thread + for each accepted client connection, Acceptor process sends accepted client connection to Threadless process over a pipe. + + ProtocolHandler implements ThreadlessWork class and hooks into the + event loop provided by Threadless. """ def __init__( @@ -855,9 +880,7 @@ def __init__( self.work_klass = work_klass self.kwargs = kwargs - # TODO: Create an abstract class that ProtocolHandler will implement. - # For now hardcode type as ProtocolHandler - self.clients: Dict[int, ProtocolHandler] = {} + self.works: Dict[int, ThreadlessWork] = {} self.selector: Optional[selectors.DefaultSelector] = None self.loop: Optional[asyncio.AbstractEventLoop] = None @@ -866,7 +889,7 @@ def selected_events(self) -> Generator[Tuple[List[Union[int, _HasFileno]], List[Union[int, _HasFileno]]], None, None]: events: Dict[socket.socket, int] = {} - for work in self.clients.values(): + for work in self.works.values(): events.update(work.get_events()) assert self.selector is not None for fd in events: @@ -887,7 +910,7 @@ async def handle_events( self, fileno: int, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: - return self.clients[fileno].handle_events(readables, writables) + return self.works[fileno].handle_events(readables, writables) # TODO: Use correct future typing annotations async def wait_for_tasks( @@ -898,8 +921,8 @@ async def wait_for_tasks( # TODO: Shutdown can block too # Currently calls flush which can use # ProtocolHandler.selector - self.clients[fileno].shutdown() - del self.clients[fileno] + self.works[fileno].shutdown() + del self.works[fileno] def run(self) -> None: try: @@ -912,18 +935,18 @@ def run(self) -> None: continue # TODO: Only send readable / writables that client originally registered. tasks = {} - for fileno in self.clients: + for fileno in self.works: tasks[fileno] = self.loop.create_task( self.handle_events(fileno, readables, writables)) # Receive accepted client connection if self.client_queue in readables: addr = self.client_queue.recv() fileno = recv_handle(self.client_queue) - self.clients[fileno] = self.work_klass( + self.works[fileno] = self.work_klass( fileno=fileno, addr=addr, **self.kwargs) - self.clients[fileno].initialize() + self.works[fileno].initialize() self.loop.run_until_complete(self.wait_for_tasks(tasks)) except KeyboardInterrupt: pass @@ -2236,7 +2259,7 @@ def get_descriptors( return [], [] -class ProtocolHandler(threading.Thread): +class ProtocolHandler(threading.Thread, ThreadlessWork): """HTTP, HTTPS, HTTP2, WebSockets protocol handler. Accepts `Client` connection object and manages ProtocolHandlerPlugin invocations. @@ -2271,6 +2294,94 @@ def initialize(self) -> None: for klass in self.config.plugins[b'ProtocolHandlerPlugin']: instance = klass(self.config, self.client, self.request) self.plugins[instance.name()] = instance + logger.debug('Handling connection %r' % self.client.connection) + + def get_events(self) -> Dict[socket.socket, int]: + events: Dict[socket.socket, int] = { + self.client.connection: selectors.EVENT_READ + } + if self.client.has_buffer(): + events[self.client.connection] |= selectors.EVENT_WRITE + + # ProtocolHandlerPlugin.get_descriptors + for plugin in self.plugins.values(): + plugin_read_desc, plugin_write_desc = plugin.get_descriptors() + for r in plugin_read_desc: + if r not in events: + events[r] = selectors.EVENT_READ + else: + events[r] |= selectors.EVENT_READ + for w in plugin_write_desc: + if w not in events: + events[w] = selectors.EVENT_WRITE + else: + events[w] |= selectors.EVENT_WRITE + + return events + + def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: + """Returns True if proxy must teardown.""" + # Flush buffer for ready to write sockets + teardown = self.handle_writables(writables) + if teardown: + return True + + # Invoke plugin.write_to_descriptors + for plugin in self.plugins.values(): + teardown = plugin.write_to_descriptors(writables) + if teardown: + return True + + # Read from ready to read sockets + teardown = self.handle_readables(readables) + if teardown: + return True + + # Invoke plugin.read_from_descriptors + for plugin in self.plugins.values(): + teardown = plugin.read_from_descriptors(readables) + if teardown: + return True + + # Teardown if client buffer is empty and connection is inactive + if not self.client.has_buffer() and \ + self.is_connection_inactive(): + self.client.queue(build_http_response( + httpStatusCodes.REQUEST_TIMEOUT, reason=b'Request Timeout', + headers={ + b'Server': PROXY_AGENT_HEADER_VALUE, + b'Connection': b'close', + } + )) + logger.debug( + 'Client buffer is empty and maximum inactivity has reached ' + 'between client and server connection, tearing down...') + return True + + return False + + def shutdown(self) -> None: + # Flush pending buffer if any + self.flush() + + # Invoke plugin.on_client_connection_close + for plugin in self.plugins.values(): + plugin.on_client_connection_close() + + logger.debug( + 'Closing proxy for connection %r ' + 'at address %r with pending client buffer size %d bytes' % + (self.client.connection, self.client.addr, self.client.buffer_size())) + + if not self.client.closed: + try: + self.client.connection.shutdown(socket.SHUT_WR) + logger.debug('Client connection shutdown successful') + except OSError: + pass + finally: + self.client.connection.close() + logger.debug('Client connection closed') def fromfd(self, fileno: int) -> socket.socket: return socket.fromfd( @@ -2389,70 +2500,6 @@ def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: return True return False - def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: - """Returns True if proxy must teardown.""" - # Flush buffer for ready to write sockets - teardown = self.handle_writables(writables) - if teardown: - return True - - # Invoke plugin.write_to_descriptors - for plugin in self.plugins.values(): - teardown = plugin.write_to_descriptors(writables) - if teardown: - return True - - # Read from ready to read sockets - teardown = self.handle_readables(readables) - if teardown: - return True - - # Invoke plugin.read_from_descriptors - for plugin in self.plugins.values(): - teardown = plugin.read_from_descriptors(readables) - if teardown: - return True - - # Teardown if client buffer is empty and connection is inactive - if not self.client.has_buffer() and \ - self.is_connection_inactive(): - self.client.queue(build_http_response( - httpStatusCodes.REQUEST_TIMEOUT, reason=b'Request Timeout', - headers={ - b'Server': PROXY_AGENT_HEADER_VALUE, - b'Connection': b'close', - } - )) - logger.debug( - 'Client buffer is empty and maximum inactivity has reached ' - 'between client and server connection, tearing down...') - return True - - return False - - def get_events(self) -> Dict[socket.socket, int]: - events: Dict[socket.socket, int] = { - self.client.connection: selectors.EVENT_READ - } - if self.client.has_buffer(): - events[self.client.connection] |= selectors.EVENT_WRITE - - # ProtocolHandlerPlugin.get_descriptors - for plugin in self.plugins.values(): - plugin_read_desc, plugin_write_desc = plugin.get_descriptors() - for r in plugin_read_desc: - if r not in events: - events[r] = selectors.EVENT_READ - else: - events[r] |= selectors.EVENT_READ - for w in plugin_write_desc: - if w not in events: - events[w] = selectors.EVENT_WRITE - else: - events[w] |= selectors.EVENT_WRITE - - return events - @contextlib.contextmanager def selected_events(self) -> \ Generator[Tuple[List[Union[int, _HasFileno]], @@ -2483,7 +2530,6 @@ def run_once(self) -> bool: def run(self) -> None: try: self.initialize() - logger.debug('Handling connection %r' % self.client.connection) while True: teardown = self.run_once() if teardown: @@ -2499,29 +2545,6 @@ def run(self) -> None: finally: self.shutdown() - def shutdown(self) -> None: - # Flush pending buffer if any - self.flush() - - # Invoke plugin.on_client_connection_close - for plugin in self.plugins.values(): - plugin.on_client_connection_close() - - logger.debug( - 'Closing proxy for connection %r ' - 'at address %r with pending client buffer size %d bytes' % - (self.client.connection, self.client.addr, self.client.buffer_size())) - - if not self.client.closed: - try: - self.client.connection.shutdown(socket.SHUT_WR) - logger.debug('Client connection shutdown successful') - except OSError: - pass - finally: - self.client.connection.close() - logger.debug('Client connection closed') - class DevtoolsProtocolPlugin(ProtocolHandlerPlugin): """ From 856aed55863d0bef0baa5d14bd927b8fe94ca49d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 16:56:15 -0700 Subject: [PATCH 24/54] Add benchmark.py Avoid TIME_WAIT by properly shutting down the connection. --- .gitignore | 1 - Makefile | 4 +- benchmark.py | 95 +++++++++++++++++++++++++++++++++++++++++++ monitor_open_files.sh | 20 +++++++++ proxy.py | 28 ++++++------- 5 files changed, 131 insertions(+), 17 deletions(-) create mode 100755 benchmark.py create mode 100755 monitor_open_files.sh diff --git a/.gitignore b/.gitignore index 9a8ba1c0db..eaabbb272a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,3 @@ proxy.py.iml *.pyc ca-*.pem https-*.pem -benchmark.py diff --git a/Makefile b/Makefile index bb44f08a1d..981bc25b75 100644 --- a/Makefile +++ b/Makefile @@ -44,8 +44,8 @@ coverage: open htmlcov/index.html lint: - flake8 --ignore=W504 --max-line-length=127 proxy.py plugin_examples.py tests.py setup.py - mypy --strict --ignore-missing-imports proxy.py plugin_examples.py tests.py setup.py + flake8 --ignore=W504 --max-line-length=127 proxy.py plugin_examples.py tests.py setup.py benchmark.py + mypy --strict --ignore-missing-imports proxy.py plugin_examples.py tests.py setup.py benchmark.py autopep8: autopep8 --recursive --in-place --aggressive proxy.py diff --git a/benchmark.py b/benchmark.py new file mode 100755 index 0000000000..296658ce96 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Programmable Proxy Server in a single Python file. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. +""" +import argparse +import asyncio +import sys +from typing import List, Tuple + +import proxy + +DEFAULT_N = 10 + + +def init_parser() -> argparse.ArgumentParser: + """Initializes and returns argument parser.""" + parser = argparse.ArgumentParser( + description='Benchmark opens N concurrent connections ' + 'to proxy.py web server. Currently, HTTP/1.1 ' + 'keep-alive connections are opened. Over each opened ' + 'connection multiple pipelined request / response ' + 'packets are exchanged with proxy.py web server.', + epilog='Proxy.py not working? Report at: %s/issues/new' % proxy.__homepage__ + ) + parser.add_argument( + '--n', '-n', + type=int, + default=DEFAULT_N, + help='Default: ' + str(DEFAULT_N) + '. See description above for meaning of N.' + ) + return parser + + +class Benchmark: + + def __init__(self, n: int = DEFAULT_N) -> None: + self.n = n + self.clients: List[Tuple[asyncio.StreamReader, asyncio.StreamWriter]] = [] + + async def open_connections(self) -> None: + for _ in range(self.n): + self.clients.append(await asyncio.open_connection('::', 8899)) + print('Opened ' + str(self.n) + ' connections') + + def send_requests(self) -> None: + for _, writer in self.clients: + writer.write(proxy.build_http_request( + proxy.httpMethods.GET, b'/' + )) + + async def recv_responses(self) -> None: + for reader, _ in self.clients: + response = proxy.HttpParser(proxy.httpParserTypes.RESPONSE_PARSER) + while response.state != proxy.httpParserStates.COMPLETE: + response.parse(await reader.read(proxy.DEFAULT_BUFFER_SIZE)) + + async def close_connections(self) -> None: + for reader, writer in self.clients: + writer.close() + await writer.wait_closed() + print('Closed ' + str(self.n) + ' connections') + + async def run(self) -> None: + num_completed_requests_per_connection: int = 0 + try: + await self.open_connections() + print('Exchanging request / response packets') + while True: + self.send_requests() + await self.recv_responses() + num_completed_requests_per_connection += 1 + await asyncio.sleep(1) + finally: + await self.close_connections() + print('Exchanged ' + str(num_completed_requests_per_connection) + + ' request / response per connection') + + +def main(input_args: List[str]) -> None: + args = init_parser().parse_args(input_args) + benchmark = Benchmark(n=args.n) + try: + asyncio.run(benchmark.run()) + except KeyboardInterrupt: + pass + + +if __name__ == '__main__': + main(sys.argv[1:]) # pragma: no cover diff --git a/monitor_open_files.sh b/monitor_open_files.sh new file mode 100755 index 0000000000..95d9af3f80 --- /dev/null +++ b/monitor_open_files.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +PROXY_PY_PID=$1 +if [ -z "$PROXY_PY_PID" ]; then + echo "PROXY_PY_PID required as argument." + exit 1 +fi + +OPEN_FILES_BY_MAIN=$(lsof -p "$PROXY_PY_PID" | wc -l) +echo "[$PROXY_PY_PID] Main process: $OPEN_FILES_BY_MAIN" + +pgrep -P "$PROXY_PY_PID" | while read -r acceptorPid; do + OPEN_FILES_BY_ACCEPTOR=$(lsof -p "$acceptorPid" | wc -l) + echo "[$acceptorPid] Acceptor process: $OPEN_FILES_BY_ACCEPTOR" + + pgrep -P "$acceptorPid" | while read -r threadlessPid; do + OPEN_FILES_BY_THREADLESS=$(lsof -p "$threadlessPid" | wc -l) + echo " [$threadlessPid] Threadless process: $OPEN_FILES_BY_THREADLESS" + done +done diff --git a/proxy.py b/proxy.py index cf9a30c0f1..0fced510d7 100755 --- a/proxy.py +++ b/proxy.py @@ -851,10 +851,12 @@ def get_events(self) -> Dict[socket.socket, int]: def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: + """Return True to shutdown work.""" return False # pragma: no cover @abstractmethod def shutdown(self) -> None: + """Must close any opened resources.""" pass # pragma: no cover @@ -2357,7 +2359,6 @@ def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List 'Client buffer is empty and maximum inactivity has reached ' 'between client and server connection, tearing down...') return True - return False def shutdown(self) -> None: @@ -2369,19 +2370,18 @@ def shutdown(self) -> None: plugin.on_client_connection_close() logger.debug( - 'Closing proxy for connection %r ' + 'Closing client connection %r ' 'at address %r with pending client buffer size %d bytes' % (self.client.connection, self.client.addr, self.client.buffer_size())) - if not self.client.closed: - try: - self.client.connection.shutdown(socket.SHUT_WR) - logger.debug('Client connection shutdown successful') - except OSError: - pass - finally: - self.client.connection.close() - logger.debug('Client connection closed') + try: + self.client.connection.shutdown(socket.SHUT_WR) + logger.debug('Client connection shutdown successful') + except OSError: + pass + finally: + self.client.connection.close() + logger.debug('Client connection closed') def fromfd(self, fileno: int) -> socket.socket: return socket.fromfd( @@ -2620,7 +2620,7 @@ def on_client_connection_close(self) -> None: pass def request_will_be_sent(self) -> Dict[str, Any]: - _now = time.time() + now = time.time() return { 'requestId': self.id, 'loaderId': self.loader_id, @@ -2640,8 +2640,8 @@ def request_will_be_sent(self) -> Dict[str, Any]: 'postData': None if self.request.method != 'POST' else text_(self.request.body) }, - 'timestamp': _now - PROXY_PY_START_TIME, - 'wallTime': _now, + 'timestamp': now - PROXY_PY_START_TIME, + 'wallTime': now, 'initiator': { 'type': 'other' }, From ebc62ff950991547e80a2c080d39b5ae63479f95 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 16:57:53 -0700 Subject: [PATCH 25/54] Add benchmark.py as part of testing workflow --- .github/workflows/testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 7815f0afc8..11cca3215a 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -27,8 +27,8 @@ jobs: run: | # The GitHub editor is 127 chars wide # W504 screams for line break after binary operators - flake8 --ignore=W504 --max-line-length=127 proxy.py plugin_examples.py tests.py setup.py + flake8 --ignore=W504 --max-line-length=127 proxy.py plugin_examples.py tests.py setup.py benchmark.py # mypy compliance check - mypy --strict --ignore-missing-imports proxy.py plugin_examples.py tests.py setup.py + mypy --strict --ignore-missing-imports proxy.py plugin_examples.py tests.py setup.py benchmark.py - name: Run Tests run: pytest tests.py From aa4410d71730dc04e92d6b77c8014f6b697d70ea Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 17:35:38 -0700 Subject: [PATCH 26/54] When e2e encryption is enabled, unwrap socket before shutdown to ensure CLOSED state --- proxy.py | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/proxy.py b/proxy.py index 0fced510d7..28239a3806 100755 --- a/proxy.py +++ b/proxy.py @@ -926,30 +926,38 @@ async def wait_for_tasks( self.works[fileno].shutdown() del self.works[fileno] + def run_once(self) -> None: + with self.selected_events() as (readables, writables): + if len(readables) == 0 and len(writables) == 0: + return + # TODO: Only send readable / writables that client originally registered. + tasks = {} + for fileno in self.works: + tasks[fileno] = self.loop.create_task( + self.handle_events(fileno, readables, writables)) + # Receive accepted client connection + if self.client_queue in readables: + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + self.works[fileno] = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + try: + self.works[fileno].initialize() + except ssl.SSLError as e: + logger.exception('ssl.SSLError', exc_info=e) + self.works[fileno].shutdown() + del self.works[fileno] + self.loop.run_until_complete(self.wait_for_tasks(tasks)) + def run(self) -> None: try: self.selector = selectors.DefaultSelector() self.selector.register(self.client_queue, selectors.EVENT_READ) self.loop = asyncio.get_event_loop() while True: - with self.selected_events() as (readables, writables): - if len(readables) == 0 and len(writables) == 0: - continue - # TODO: Only send readable / writables that client originally registered. - tasks = {} - for fileno in self.works: - tasks[fileno] = self.loop.create_task( - self.handle_events(fileno, readables, writables)) - # Receive accepted client connection - if self.client_queue in readables: - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) - self.works[fileno] = self.work_klass( - fileno=fileno, - addr=addr, - **self.kwargs) - self.works[fileno].initialize() - self.loop.run_until_complete(self.wait_for_tasks(tasks)) + self.run_once() except KeyboardInterrupt: pass finally: @@ -2067,8 +2075,10 @@ def cache_pac_file_response(self) -> None: def routes(self) -> List[Tuple[int, bytes]]: if self.config.pac_file_url_path: - return [(httpProtocolTypes.HTTP, bytes_( - self.config.pac_file_url_path))] + return [ + (httpProtocolTypes.HTTP, bytes_(self.config.pac_file_url_path)), + (httpProtocolTypes.HTTPS, bytes_(self.config.pac_file_url_path)), + ] return [] # pragma: no cover def handle_request(self, request: HttpParser) -> None: @@ -2374,13 +2384,15 @@ def shutdown(self) -> None: 'at address %r with pending client buffer size %d bytes' % (self.client.connection, self.client.addr, self.client.buffer_size())) + if self.config.encryption_enabled(): + conn = self.client.connection.unwrap() try: - self.client.connection.shutdown(socket.SHUT_WR) + conn.shutdown(socket.SHUT_RDWR) logger.debug('Client connection shutdown successful') except OSError: pass finally: - self.client.connection.close() + conn.close() logger.debug('Client connection closed') def fromfd(self, fileno: int) -> socket.socket: From 095234e3fe59c422478e8ad36a9e39148faf100e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 17:37:36 -0700 Subject: [PATCH 27/54] MyPy fixes, Union should have worked, but likely unwrap is not part of socket.socket hence --- proxy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proxy.py b/proxy.py index 28239a3806..8ee6033c71 100755 --- a/proxy.py +++ b/proxy.py @@ -927,6 +927,7 @@ async def wait_for_tasks( del self.works[fileno] def run_once(self) -> None: + assert self.loop is not None with self.selected_events() as (readables, writables): if len(readables) == 0 and len(writables) == 0: return @@ -2385,7 +2386,7 @@ def shutdown(self) -> None: (self.client.connection, self.client.addr, self.client.buffer_size())) if self.config.encryption_enabled(): - conn = self.client.connection.unwrap() + conn = cast(ssl.SSLSocket, self.client.connection).unwrap() try: conn.shutdown(socket.SHUT_RDWR) logger.debug('Client connection shutdown successful') From b307e2c88dcfadf0edcb24ecfe997a9d53c9e2e3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 17:48:19 -0700 Subject: [PATCH 28/54] Unwrap if wrapped before shutdown --- proxy.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index 8ee6033c71..a2e1044467 100755 --- a/proxy.py +++ b/proxy.py @@ -2385,9 +2385,12 @@ def shutdown(self) -> None: 'at address %r with pending client buffer size %d bytes' % (self.client.connection, self.client.addr, self.client.buffer_size())) - if self.config.encryption_enabled(): - conn = cast(ssl.SSLSocket, self.client.connection).unwrap() + # Unwrap if wrapped before shutdown. + conn = self.client.connection try: + if self.config.encryption_enabled() and \ + isinstance(self.client.connection, ssl.SSLSocket): + conn = cast(ssl.SSLSocket, self.client.connection).unwrap() conn.shutdown(socket.SHUT_RDWR) logger.debug('Client connection shutdown successful') except OSError: From 77ad741cf6fab40d75aac66ff9fc3302420ba633 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 17:49:18 -0700 Subject: [PATCH 29/54] Unwrap if wrapped before shutdown --- proxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy.py b/proxy.py index a2e1044467..3742851b7b 100755 --- a/proxy.py +++ b/proxy.py @@ -2390,7 +2390,7 @@ def shutdown(self) -> None: try: if self.config.encryption_enabled() and \ isinstance(self.client.connection, ssl.SSLSocket): - conn = cast(ssl.SSLSocket, self.client.connection).unwrap() + conn = self.client.connection.unwrap() conn.shutdown(socket.SHUT_RDWR) logger.debug('Client connection shutdown successful') except OSError: From 4db00045681d5e691c3e1c366062a34fc08da888 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 19:25:02 -0700 Subject: [PATCH 30/54] socket.SHUT_RDWR will cause leaks --- proxy.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/proxy.py b/proxy.py index 3742851b7b..2319449d3d 100755 --- a/proxy.py +++ b/proxy.py @@ -1490,10 +1490,15 @@ def on_client_connection_close(self) -> None: return self.access_log() # Invoke plugin.on_upstream_connection_close - if self.server and not self.server.closed: + if self.server: for plugin in self.plugins.values(): plugin.on_upstream_connection_close() - self.server.close() + try: + self.server.connection.shutdown(socket.SHUT_WR) + except OSError: + pass + finally: + self.server.connection.close() logger.debug( 'Closed server connection with pending server buffer size %d bytes' % self.server.buffer_size()) @@ -1883,7 +1888,7 @@ def run(self) -> None: finally: try: self.selector.unregister(self.sock) - self.sock.shutdown(socket.SHUT_RDWR) + self.sock.shutdown(socket.SHUT_WR) except Exception as e: logging.exception('Exception while shutdown of websocket client', exc_info=e) self.sock.close() @@ -2385,13 +2390,13 @@ def shutdown(self) -> None: 'at address %r with pending client buffer size %d bytes' % (self.client.connection, self.client.addr, self.client.buffer_size())) - # Unwrap if wrapped before shutdown. conn = self.client.connection try: + # Unwrap if wrapped before shutdown. if self.config.encryption_enabled() and \ isinstance(self.client.connection, ssl.SSLSocket): conn = self.client.connection.unwrap() - conn.shutdown(socket.SHUT_RDWR) + conn.shutdown(socket.SHUT_WR) logger.debug('Client connection shutdown successful') except OSError: pass From a6c8b7ad6ec09210d3bb8a8b9e02201e36e5afd0 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 19:26:37 -0700 Subject: [PATCH 31/54] MyPy --- proxy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/proxy.py b/proxy.py index 2319449d3d..a901e68458 100755 --- a/proxy.py +++ b/proxy.py @@ -1493,6 +1493,7 @@ def on_client_connection_close(self) -> None: if self.server: for plugin in self.plugins.values(): plugin.on_upstream_connection_close() + assert self.server try: self.server.connection.shutdown(socket.SHUT_WR) except OSError: From 28ebc62336dbee7c3fed9830b8855f55c328c892 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 19:44:25 -0700 Subject: [PATCH 32/54] Add instructions for monitor.sh --- monitor_open_files.sh => monitor.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) rename monitor_open_files.sh => monitor.sh (65%) diff --git a/monitor_open_files.sh b/monitor.sh similarity index 65% rename from monitor_open_files.sh rename to monitor.sh index 95d9af3f80..9ed93c1aa5 100755 --- a/monitor_open_files.sh +++ b/monitor.sh @@ -1,5 +1,18 @@ #!/bin/bash +# proxy.py +# ~~~~~~~~ +# ⚡⚡⚡ Fast, Lightweight, Programmable Proxy Server in a single Python file. +# +# :copyright: (c) 2013-present by Abhinav Singh and contributors. +# :license: BSD, see LICENSE for more details. +# +# Usage +# ./monitor +# +# Alternately, just run: +# watch -n 5 'lsof -i TCP:8899 | grep -v LISTEN' + PROXY_PY_PID=$1 if [ -z "$PROXY_PY_PID" ]; then echo "PROXY_PY_PID required as argument." From 6da580529acac31e386a7a452381635c987705ac Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 21:14:54 -0700 Subject: [PATCH 33/54] Avoid recursive exception in new_socket_connection and only invoke plugins/shutdown if server connection was initialized --- proxy.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/proxy.py b/proxy.py index a901e68458..56725a5d1f 100755 --- a/proxy.py +++ b/proxy.py @@ -327,6 +327,7 @@ def find_http_line(raw: bytes) -> Tuple[Optional[bytes], bytes]: def new_socket_connection(addr: Tuple[str, int]) -> socket.socket: + conn = None try: ip = ipaddress.ip_address(addr[0]) if ip.version == 4: @@ -338,10 +339,13 @@ def new_socket_connection(addr: Tuple[str, int]) -> socket.socket: socket.AF_INET6, socket.SOCK_STREAM, 0) conn.connect((addr[0], addr[1], 0, 0)) except ValueError: - # Not a valid IP address, most likely its a domain name, - # try to establish dual stack IPv4/IPv6 connection. - conn = socket.create_connection(addr) - return conn + pass # does not appear to be an IPv4 or IPv6 address + + if conn is not None: + return conn + + # try to establish dual stack IPv4/IPv6 connection. + return socket.create_connection(addr) class socket_connection(contextlib.ContextDecorator): @@ -1488,12 +1492,18 @@ def access_log(self) -> None: def on_client_connection_close(self) -> None: if not self.request.has_upstream_server(): return + self.access_log() + + # If server was never initialized, nothing to do + if not self.server or \ + not self.server.connection: + return + # Invoke plugin.on_upstream_connection_close - if self.server: - for plugin in self.plugins.values(): - plugin.on_upstream_connection_close() - assert self.server + for plugin in self.plugins.values(): + plugin.on_upstream_connection_close() + try: self.server.connection.shutdown(socket.SHUT_WR) except OSError: From 2b376a6f2678163f91fe4a13c1f13790ca06e107 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 21:24:51 -0700 Subject: [PATCH 34/54] Add Fast & Scalable section --- README.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 781491499b..538d402e10 100644 --- a/README.md +++ b/README.md @@ -49,9 +49,16 @@ Table of Contents * [End-to-End Encryption](#end-to-end-encryption) * [TLS Interception](#tls-interception) * [import proxy.py](#import-proxypy) - * [proxy.new_socket_connection](#proxynew_socket_connection) - * [proxy.socket_connection](#proxysocket_connection) - * [proxy.build_http_request](#proxybuild_http_request) + * [TCP Sockets](#tcp-sockets) + * [proxy.new_socket_connection](#proxynew_socket_connection) + * [proxy.socket_connection](#proxysocket_connection) + * [Http Client](#http-client) + * [proxy.build_http_request](#proxybuild_http_request) + * [proxy.build_http_response](#proxybuild_http_response) + * [Websocket Client](#websocket-client) + * [proxy.WebsocketFrame](#proxywebsocketframe) + * [proxy.WebsocketClient](#proxywebsocketclient) + * [Embed proxy.py](#embed-proxypy) * [Plugin Developer and Contributor Guide](#plugin-developer-and-contributor-guide) * [Everything is a plugin](#everything-is-a-plugin) * [Internal Architecture](#internal-architecture) @@ -68,6 +75,29 @@ Table of Contents Features ======== +- Fast & Scalable + - Utilizes all available cores on the system + - Threadless executions using coroutine + - Made to handle `tens-of-thousands` connections / sec + ``` + # On Macbook Pro 2015 / 2.8 GHz Intel Core i7 + $ hey -n 10000 -c 100 http://localhost:8899/ + + Summary: + Total: 0.6157 secs + Slowest: 0.1049 secs + Fastest: 0.0007 secs + Average: 0.0055 secs + Requests/sec: 16240.5444 + + Total data: 800000 bytes + Size/request: 80 bytes + + Response time histogram: + 0.001 [1] | + 0.011 [9565] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ + 0.022 [332] |■ + ``` - Lightweight - Distributed as a single file module `~100KB` - Uses only `~5-20MB` RAM @@ -599,7 +629,9 @@ $ python >>> ``` -## proxy.new_socket_connection +## TCP Sockets + +### proxy.new_socket_connection Attempts to create an IPv4 connection, then IPv6 and finally a dual stack connection to provided address. @@ -610,7 +642,7 @@ finally a dual stack connection to provided address. >>> conn.close() ``` -## proxy.socket_connection +### proxy.socket_connection `socket_connection` is a convenient decorator + context manager around `new_socket_connection` which ensures `conn.close` is implicit. @@ -630,9 +662,11 @@ As a decorator: >>> ... [ use connection ] ... ``` -## proxy.build_http_request +## Http Client + +### proxy.build_http_request -#### Generate HTTP GET request +##### Generate HTTP GET request ``` >>> proxy.build_http_request(b'GET', b'/') @@ -640,7 +674,7 @@ b'GET / HTTP/1.1\r\n\r\n' >>> ``` -#### Generate HTTP GET request with headers +##### Generate HTTP GET request with headers ``` >>> proxy.build_http_request(b'GET', b'/', @@ -649,7 +683,7 @@ b'GET / HTTP/1.1\r\nConnection: close\r\n\r\n' >>> ``` -#### Generate HTTP POST request with headers and body +##### Generate HTTP POST request with headers and body ``` >>> import json @@ -659,6 +693,22 @@ b'GET / HTTP/1.1\r\nConnection: close\r\n\r\n' b'POST /form HTTP/1.1\r\nContent-type: application/json\r\n\r\n{"email": "hello@world.com"}' ``` +### proxy.build_http_response + +TODO + +## Websocket Client + +### proxy.WebsocketFrame + +TODO + +### proxy.WebsocketClient + +TODO + +## Embed proxy.py + To start `proxy.py` server from imported `proxy.py` module, simply do: ``` From d2ba8c8110d6e4c425d4c54795f559cf77ec2eb4 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 21:29:08 -0700 Subject: [PATCH 35/54] Update internal classes section --- README.md | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 538d402e10..01731d45c1 100644 --- a/README.md +++ b/README.md @@ -798,33 +798,23 @@ Example: ``` $ pydoc3 proxy -Help on module proxy: - -NAME - proxy - -DESCRIPTION - proxy.py - ~~~~~~~~ - Lightweight, Programmable, TLS interceptor Proxy for HTTP(S), HTTP2, WebSockets protocols in a single Python file. - - :copyright: (c) 2013-present by Abhinav Singh and contributors. - :license: BSD, see LICENSE for more details. CLASSES abc.ABC(builtins.object) HttpProxyBasePlugin HttpWebServerBasePlugin - DevtoolsFrontendPlugin + DevtoolsWebsocketPlugin HttpWebServerPacFilePlugin ProtocolHandlerPlugin - DevtoolsEventGeneratorPlugin + DevtoolsProtocolPlugin HttpProxyPlugin HttpWebServerPlugin TcpConnection TcpClientConnection TcpServerConnection WebsocketClient + ThreadlessWork + ProtocolHandler(threading.Thread, ThreadlessWork) builtins.Exception(builtins.BaseException) ProtocolException HttpRequestRejected @@ -839,17 +829,20 @@ CLASSES WebsocketFrame builtins.tuple(builtins.object) ChunkParserStates + HttpMethods HttpParserStates HttpParserTypes HttpProtocolTypes + HttpStatusCodes TcpConnectionTypes WebsocketOpcodes contextlib.ContextDecorator(builtins.object) socket_connection multiprocessing.context.Process(multiprocessing.process.BaseProcess) Acceptor + Threadless threading.Thread(builtins.object) - ProtocolHandler + ProtocolHandler(threading.Thread, ThreadlessWork) ``` Frequently Asked Questions From 6664409bfe48cb230b431004d3204c0c1b037f8e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 14 Oct 2019 21:43:40 -0700 Subject: [PATCH 36/54] Dont print out local dir path in help text :) --- README.md | 9 ++++----- proxy.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 01731d45c1..1ec7bcc910 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ Features ======== - Fast & Scalable - - Utilizes all available cores on the system + - Scales by using all available cores on the system - Threadless executions using coroutine - Made to handle `tens-of-thousands` connections / sec ``` @@ -1034,10 +1034,9 @@ optional arguments: value for faster downloads at the expense of increased RAM. --static-server-dir STATIC_SERVER_DIR - Default: /Users/abhinav/Dev/proxy.py/public. Static - server root directory. This option is only applicable - when static server is also enabled. See --enable- - static-server. + Default: "public" folder in directory where proxy.py + is placed. This option is only applicable when static + server is also enabled. See --enable-static-server. --threadless Default: False. When disabled a new thread is spawned to handle each client connection. --timeout TIMEOUT Default: 10. Number of seconds after which an inactive diff --git a/proxy.py b/proxy.py index 56725a5d1f..c0a1f7684b 100755 --- a/proxy.py +++ b/proxy.py @@ -2977,7 +2977,7 @@ def init_parser() -> argparse.ArgumentParser: '--static-server-dir', type=str, default=DEFAULT_STATIC_SERVER_DIR, - help='Default: ' + DEFAULT_STATIC_SERVER_DIR + '. Static server root directory. ' + help='Default: "public" folder in directory where proxy.py is placed. ' 'This option is only applicable when static server is also enabled. ' 'See --enable-static-server.' ) From bfeee65bf3d2b7d119a5b7d02788611d0505eb24 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 00:06:37 -0700 Subject: [PATCH 37/54] Refactor --- monitor.sh | 2 +- proxy.py | 124 +++++++++++++++++++++++++++++++++-------------------- 2 files changed, 79 insertions(+), 47 deletions(-) diff --git a/monitor.sh b/monitor.sh index 9ed93c1aa5..a5001d002b 100755 --- a/monitor.sh +++ b/monitor.sh @@ -11,7 +11,7 @@ # ./monitor # # Alternately, just run: -# watch -n 5 'lsof -i TCP:8899 | grep -v LISTEN' +# watch -n 1 'lsof -i TCP:8899 | grep -v LISTEN' PROXY_PY_PID=$1 if [ -z "$PROXY_PY_PID" ]; then diff --git a/proxy.py b/proxy.py index c0a1f7684b..4d1c559f21 100755 --- a/proxy.py +++ b/proxy.py @@ -417,6 +417,7 @@ def recv(self, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Optional[bytes]: logger.debug( 'received %d bytes from %s' % (len(data), self.tag)) + logger.debug(data) return data except socket.error as e: if e.errno == errno.ECONNRESET: @@ -449,6 +450,7 @@ def flush(self) -> int: if self.closed: raise BrokenPipeError() sent: int = self.send(self.buffer) + logger.debug(self.buffer[:sent]) self.buffer = self.buffer[sent:] logger.debug('flushed %d bytes to %s' % (sent, self.tag)) return sent @@ -847,6 +849,10 @@ class ThreadlessWork(ABC): def initialize(self) -> None: pass # pragma: no cover + @abstractmethod + def is_inactive(self) -> bool: + return False # pragma: no cover + @abstractmethod def get_events(self) -> Dict[socket.socket, int]: return {} # pragma: no cover @@ -921,40 +927,68 @@ async def handle_events( # TODO: Use correct future typing annotations async def wait_for_tasks( self, tasks: Dict[int, Any]) -> None: - for fileno in tasks: - teardown = await tasks[fileno] - if teardown: - # TODO: Shutdown can block too - # Currently calls flush which can use - # ProtocolHandler.selector - self.works[fileno].shutdown() - del self.works[fileno] + for work_id in tasks: + # TODO: Resolving one handle_events here can block resolution of other tasks + try: + teardown = await asyncio.wait_for(tasks[work_id], DEFAULT_TIMEOUT) + if teardown: + self.cleanup(work_id) + except asyncio.TimeoutError: + self.cleanup(work_id) + + def accept_client(self) -> None: + addr = self.client_queue.recv() + fileno = recv_handle(self.client_queue) + self.works[fileno] = self.work_klass( + fileno=fileno, + addr=addr, + **self.kwargs) + try: + self.works[fileno].initialize() + except ssl.SSLError as e: + logger.exception('ssl.SSLError', exc_info=e) + self.cleanup(fileno) + + def cleanup_inactive(self) -> None: + inactive_works: List[int] = [] + for work_id in self.works: + if self.works[work_id].is_inactive(): + inactive_works.append(work_id) + for work_id in inactive_works: + self.cleanup(work_id) + + def cleanup(self, work_id: int) -> None: + # TODO: ProtocolHandler.shutdown can call flush which may block + self.works[work_id].shutdown() + del self.works[work_id] def run_once(self) -> None: assert self.loop is not None + readables: List[Union[int, _HasFileno]] = [] + writables: List[Union[int, _HasFileno]] = [] with self.selected_events() as (readables, writables): if len(readables) == 0 and len(writables) == 0: + # Remove and shutdown inactive connections + self.cleanup_inactive() return - # TODO: Only send readable / writables that client originally registered. - tasks = {} - for fileno in self.works: - tasks[fileno] = self.loop.create_task( - self.handle_events(fileno, readables, writables)) - # Receive accepted client connection - if self.client_queue in readables: - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) - self.works[fileno] = self.work_klass( - fileno=fileno, - addr=addr, - **self.kwargs) - try: - self.works[fileno].initialize() - except ssl.SSLError as e: - logger.exception('ssl.SSLError', exc_info=e) - self.works[fileno].shutdown() - del self.works[fileno] - self.loop.run_until_complete(self.wait_for_tasks(tasks)) + # Note that selector from now on is idle, + # until all the logic below completes. + # + # Invoke Threadless.handle_events + # TODO: Only send readable / writables that client originally registered. + tasks = {} + for fileno in self.works: + logger.debug('Creating task for %s', fileno) + tasks[fileno] = self.loop.create_task( + self.handle_events(fileno, readables, writables)) + # Accepted client connection from Acceptor + if self.client_queue in readables: + logger.debug('Accepting client') + self.accept_client() + # Wait for Threadless.handle_events to complete + self.loop.run_until_complete(self.wait_for_tasks(tasks)) + # Remove and shutdown inactive connections + self.cleanup_inactive() def run(self) -> None: try: @@ -2325,6 +2359,12 @@ def initialize(self) -> None: self.plugins[instance.name()] = instance logger.debug('Handling connection %r' % self.client.connection) + def is_inactive(self) -> bool: + if not self.client.has_buffer() and \ + self.connection_inactive_for() > self.config.timeout: + return True + return False + def get_events(self) -> Dict[socket.socket, int]: events: Dict[socket.socket, int] = { self.client.connection: selectors.EVENT_READ @@ -2348,7 +2388,10 @@ def get_events(self) -> Dict[socket.socket, int]: return events - def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List[Union[int, _HasFileno]]) -> bool: + def handle_events( + self, + readables: List[Union[int, _HasFileno]], + writables: List[Union[int, _HasFileno]]) -> bool: """Returns True if proxy must teardown.""" # Flush buffer for ready to write sockets teardown = self.handle_writables(writables) @@ -2372,20 +2415,6 @@ def handle_events(self, readables: List[Union[int, _HasFileno]], writables: List if teardown: return True - # Teardown if client buffer is empty and connection is inactive - if not self.client.has_buffer() and \ - self.is_connection_inactive(): - self.client.queue(build_http_response( - httpStatusCodes.REQUEST_TIMEOUT, reason=b'Request Timeout', - headers={ - b'Server': PROXY_AGENT_HEADER_VALUE, - b'Connection': b'close', - } - )) - logger.debug( - 'Client buffer is empty and maximum inactivity has reached ' - 'between client and server connection, tearing down...') - return True return False def shutdown(self) -> None: @@ -2441,9 +2470,6 @@ def optionally_wrap_socket( def connection_inactive_for(self) -> float: return time.time() - self.last_activity - def is_connection_inactive(self) -> bool: - return self.connection_inactive_for() > self.config.timeout - def flush(self) -> None: if not self.client.has_buffer(): return @@ -2563,6 +2589,12 @@ def run(self) -> None: try: self.initialize() while True: + # Teardown if client buffer is empty and connection is inactive + if self.is_inactive(): + logger.debug( + 'Client buffer is empty and maximum inactivity has reached ' + 'between client and server connection, tearing down...') + break teardown = self.run_once() if teardown: break From 44db7496f32201e6a4758d84e74e43c5d9266ca3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 12:51:16 -0700 Subject: [PATCH 38/54] Fix a bug where response parser for HTTP only requests was reused for pipelined requests resulting in a hang --- proxy.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/proxy.py b/proxy.py index 4d1c559f21..4743c53eee 100755 --- a/proxy.py +++ b/proxy.py @@ -417,7 +417,7 @@ def recv(self, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Optional[bytes]: logger.debug( 'received %d bytes from %s' % (len(data), self.tag)) - logger.debug(data) + # logger.debug(data) return data except socket.error as e: if e.errno == errno.ECONNRESET: @@ -450,7 +450,7 @@ def flush(self) -> int: if self.closed: raise BrokenPipeError() sent: int = self.send(self.buffer) - logger.debug(self.buffer[:sent]) + # logger.debug(self.buffer[:sent]) self.buffer = self.buffer[sent:] logger.debug('flushed %d bytes to %s' % (sent, self.tag)) return sent @@ -978,12 +978,10 @@ def run_once(self) -> None: # TODO: Only send readable / writables that client originally registered. tasks = {} for fileno in self.works: - logger.debug('Creating task for %s', fileno) tasks[fileno] = self.loop.create_task( self.handle_events(fileno, readables, writables)) # Accepted client connection from Acceptor if self.client_queue in readables: - logger.debug('Accepting client') self.accept_client() # Wait for Threadless.handle_events to complete self.loop.run_until_complete(self.wait_for_tasks(tasks)) @@ -1438,6 +1436,7 @@ def __init__( self.server: Optional[TcpServerConnection] = None self.response: HttpParser = HttpParser(httpParserTypes.RESPONSE_PARSER) self.pipeline_request: Optional[HttpParser] = None + self.pipeline_response: Optional[HttpParser] = None self.plugins: Dict[str, HttpProxyBasePlugin] = {} if b'HttpProxyBasePlugin' in self.config.plugins: @@ -1476,7 +1475,7 @@ def write_to_descriptors(self, w: List[Union[int, _HasFileno]]) -> bool: def read_from_descriptors(self, r: List[Union[int, _HasFileno]]) -> bool: if self.request.has_upstream_server( ) and self.server and not self.server.closed and self.server.connection in r: - logger.debug('Server is ready for reads, reading') + logger.debug('Server is ready for reads, reading...') raw = self.server.recv(self.config.server_recvbuf_size) if not raw: logger.debug('Server closed connection, tearing down...') @@ -1490,7 +1489,14 @@ def read_from_descriptors(self, r: List[Union[int, _HasFileno]]) -> bool: # tls interception is enabled if self.request.method != httpMethods.CONNECT or \ self.config.tls_interception_enabled(): - self.response.parse(raw) + if self.response.state == httpParserStates.COMPLETE: + if self.pipeline_response is None: + self.pipeline_response = HttpParser(httpParserTypes.RESPONSE_PARSER) + self.pipeline_response.parse(raw) + if self.pipeline_response.state == httpParserStates.COMPLETE: + self.pipeline_response = None + else: + self.response.parse(raw) else: self.response.total_size += len(raw) # queue raw data for client @@ -1529,21 +1535,26 @@ def on_client_connection_close(self) -> None: self.access_log() - # If server was never initialized, nothing to do - if not self.server or \ - not self.server.connection: + # If server was never initialized, return + if self.server is None: return + # Note that, server instance was initialized + # but not necessarily the connection object exists. # Invoke plugin.on_upstream_connection_close for plugin in self.plugins.values(): plugin.on_upstream_connection_close() try: - self.server.connection.shutdown(socket.SHUT_WR) - except OSError: + try: + self.server.connection.shutdown(socket.SHUT_WR) + except OSError: + pass + finally: + self.server.connection.close() + except TcpConnectionUninitializedException: pass finally: - self.server.connection.close() logger.debug( 'Closed server connection with pending server buffer size %d bytes' % self.server.buffer_size()) @@ -1564,9 +1575,6 @@ def on_client_data(self, raw: bytes) -> Optional[bytes]: return raw if self.server and not self.server.closed: - # If 1st request did reach completion stage - # and 1st request was not a CONNECT request - # or if TLS interception was enabled if self.request.state == httpParserStates.COMPLETE and ( self.request.method != httpMethods.CONNECT or self.config.tls_interception_enabled()): From 61f88ff4ceeb9fa406cb14bb254b4b7695a52920 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 12:55:29 -0700 Subject: [PATCH 39/54] Add chrome_with_proxy.sh helper script --- chrome_with_proxy.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 chrome_with_proxy.sh diff --git a/chrome_with_proxy.sh b/chrome_with_proxy.sh new file mode 100755 index 0000000000..d77ea0073d --- /dev/null +++ b/chrome_with_proxy.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --no-first-run \ + --no-default-browser-check \ + --user-data-dir="$(mktemp -d -t 'chrome-remote_data_dir')" \ + --proxy-server=localhost:8899 From d3ccea78db1117fece2840fdcd6a82e23ab778c6 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 13:32:08 -0700 Subject: [PATCH 40/54] Handle OSError during client.flush which can happen due to invalid protocol type for socket error --- proxy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proxy.py b/proxy.py index 4743c53eee..17890d98d5 100755 --- a/proxy.py +++ b/proxy.py @@ -2507,6 +2507,9 @@ def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: try: self.client.flush() + except OSError as e: + logger.error('OSError when flushing buffer to client') + return True except BrokenPipeError: logger.error( 'BrokenPipeError when flushing buffer for client') From 8fe7f99d69f2e53ab496ec2bbb9bd40ca28cc6a5 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 13:33:14 -0700 Subject: [PATCH 41/54] Remove redundant e --- proxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy.py b/proxy.py index 17890d98d5..3545f5f482 100755 --- a/proxy.py +++ b/proxy.py @@ -2507,7 +2507,7 @@ def handle_writables(self, writables: List[Union[int, _HasFileno]]) -> bool: try: self.client.flush() - except OSError as e: + except OSError: logger.error('OSError when flushing buffer to client') return True except BrokenPipeError: From d27d8649d9ac15b2eab984c125bac8a956f5b78f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 13:45:31 -0700 Subject: [PATCH 42/54] Add classmethods to quickly construct a parser object --- proxy.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/proxy.py b/proxy.py index 3545f5f482..f004fb5cec 100755 --- a/proxy.py +++ b/proxy.py @@ -39,7 +39,7 @@ from multiprocessing import connection from multiprocessing.reduction import send_handle, recv_handle from types import TracebackType -from typing import Any, Dict, List, Tuple, Optional, Union, NamedTuple, Callable, Type +from typing import Any, Dict, List, Tuple, Optional, Union, NamedTuple, Callable, Type, TypeVar from typing import cast, Generator, TYPE_CHECKING from urllib import parse as urlparse @@ -551,6 +551,9 @@ def to_chunks(raw: bytes, chunk_size: int = DEFAULT_BUFFER_SIZE) -> bytes: return CRLF.join(chunks) + CRLF +T = TypeVar('T', bound='HttpParser') + + class HttpParser: """HTTP request/response parser.""" @@ -583,6 +586,18 @@ def __init__(self, parser_type: int) -> None: self.port: Optional[int] = None self.path: Optional[bytes] = None + @classmethod + def request(cls: Type[T], raw: bytes) -> T: + parser = cls(httpParserTypes.REQUEST_PARSER) + parser.parse(raw) + return parser + + @classmethod + def response(cls: Type[T], raw: bytes) -> T: + parser = cls(httpParserTypes.RESPONSE_PARSER) + parser.parse(raw) + return parser + def header(self, key: bytes) -> bytes: if key.lower() not in self.headers: raise KeyError('%s not found in headers', text_(key)) From f7e81232f92a0044e489cca6758924e1f4aabfdd Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 16:49:32 -0700 Subject: [PATCH 43/54] Don't raise from TcpConnection abstract class. This allows both client/socket side of communication to handle exceptions as necessary. We might refactor this again later to remove redundant code :) --- chrome_with_proxy.sh | 7 -- monitor.sh => monitor_proxy_open_files.sh | 0 proxy.py | 130 ++++++++++++++-------- start_chrome_with_proxy.sh | 24 ++++ tests.py | 29 ----- 5 files changed, 109 insertions(+), 81 deletions(-) delete mode 100755 chrome_with_proxy.sh rename monitor.sh => monitor_proxy_open_files.sh (100%) create mode 100755 start_chrome_with_proxy.sh diff --git a/chrome_with_proxy.sh b/chrome_with_proxy.sh deleted file mode 100755 index d77ea0073d..0000000000 --- a/chrome_with_proxy.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ - --no-first-run \ - --no-default-browser-check \ - --user-data-dir="$(mktemp -d -t 'chrome-remote_data_dir')" \ - --proxy-server=localhost:8899 diff --git a/monitor.sh b/monitor_proxy_open_files.sh similarity index 100% rename from monitor.sh rename to monitor_proxy_open_files.sh diff --git a/proxy.py b/proxy.py index f004fb5cec..39c9f80ee0 100755 --- a/proxy.py +++ b/proxy.py @@ -411,22 +411,15 @@ def send(self, data: bytes) -> int: return self.connection.send(data) def recv(self, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Optional[bytes]: - try: - data: bytes = self.connection.recv(buffer_size) - if len(data) > 0: - logger.debug( - 'received %d bytes from %s' % - (len(data), self.tag)) - # logger.debug(data) - return data - except socket.error as e: - if e.errno == errno.ECONNRESET: - logger.debug('%r' % e) - else: - logger.exception( - 'Exception while receiving from connection %s %r with reason %r' % - (self.tag, self.connection, e)) - return None + """Users must handle socket.error exceptions""" + data: bytes = self.connection.recv(buffer_size) + if len(data) == 0: + return None + logger.debug( + 'received %d bytes from %s' % + (len(data), self.tag)) + # logger.debug(data) + return data def close(self) -> bool: if not self.closed: @@ -445,10 +438,9 @@ def queue(self, data: bytes) -> int: return len(data) def flush(self) -> int: + """Users must handle BrokenPipeError exceptions""" if self.buffer_size() == 0: return 0 - if self.closed: - raise BrokenPipeError() sent: int = self.send(self.buffer) # logger.debug(self.buffer[:sent]) self.buffer = self.buffer[sent:] @@ -1481,6 +1473,9 @@ def write_to_descriptors(self, w: List[Union[int, _HasFileno]]) -> bool: logger.debug('Server is write ready, flushing buffer') try: self.server.flush() + except OSError: + logger.error('OSError when flushing buffer to server') + return True except BrokenPipeError: logger.error( 'BrokenPipeError when flushing buffer for server') @@ -1491,7 +1486,22 @@ def read_from_descriptors(self, r: List[Union[int, _HasFileno]]) -> bool: if self.request.has_upstream_server( ) and self.server and not self.server.closed and self.server.connection in r: logger.debug('Server is ready for reads, reading...') - raw = self.server.recv(self.config.server_recvbuf_size) + raw: Optional[bytes] = None + + try: + raw = self.server.recv(self.config.server_recvbuf_size) + except ssl.SSLWantReadError: # Try again later + logger.warning('SSLWantReadError encountered while reading from server, will retry ...') + return False + except socket.error as e: + if e.errno == errno.ECONNRESET: + logger.warning('Connection reset by upstream: %r' % e) + else: + logger.exception( + 'Exception while receiving from %s connection %r with reason %r' % + (self.server.tag, self.server.connection, e)) + return True + if not raw: logger.debug('Server closed connection, tearing down...') return True @@ -1566,6 +1576,7 @@ def on_client_connection_close(self) -> None: except OSError: pass finally: + # TODO: Unwrap if wrapped before close? self.server.connection.close() except TcpConnectionUninitializedException: pass @@ -1645,6 +1656,34 @@ def generate_upstream_certificate(self, _certificate: Optional[Dict[str, Any]]) sign_cert.communicate(timeout=10) return cert_file_path + def wrap_server(self) -> None: + assert self.server is not None + assert isinstance(self.server.connection, socket.socket) + ctx = ssl.create_default_context( + ssl.Purpose.SERVER_AUTH) + ctx.options |= ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 + self.server.connection.setblocking(True) + self.server._conn = ctx.wrap_socket( + self.server.connection, + server_hostname=text_(self.request.host)) + self.server.connection.setblocking(False) + + def wrap_client(self) -> None: + assert self.server is not None + assert isinstance(self.server.connection, ssl.SSLSocket) + generated_cert = self.generate_upstream_certificate( + cast(Dict[str, Any], self.server.connection.getpeercert())) + self.client.connection.setblocking(True) + self.client.flush() + self.client._conn = ssl.wrap_socket( + self.client.connection, + server_side=True, + keyfile=self.config.ca_signing_key_file, + certfile=generated_cert) + self.client.connection.setblocking(False) + logger.info( + 'TLS interception using %s', generated_cert) + def on_request_complete(self) -> Union[socket.socket, bool]: if not self.request.has_upstream_server(): return False @@ -1677,31 +1716,20 @@ def on_request_complete(self) -> Union[socket.socket, bool]: HttpProxyPlugin.PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT) # If interception is enabled if self.config.tls_interception_enabled(): - assert self.server is not None - assert isinstance(self.server.connection, socket.socket) # Perform SSL/TLS handshake with upstream - ctx = ssl.create_default_context( - ssl.Purpose.SERVER_AUTH) - ctx.options |= ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 - self.server.connection.setblocking(True) - self.server._conn = ctx.wrap_socket( - self.server.connection, - server_hostname=text_(self.request.host)) - self.server.connection.setblocking(False) - assert isinstance(self.server.connection, ssl.SSLSocket) + self.wrap_server() # Generate certificate and perform handshake with client - generated_cert = self.generate_upstream_certificate( - cast(Dict[str, Any], self.server.connection.getpeercert())) - self.client.flush() - self.client.connection.setblocking(True) - self.client._conn = ssl.wrap_socket( - self.client.connection, - server_side=True, - keyfile=self.config.ca_signing_key_file, - certfile=generated_cert) - self.client.connection.setblocking(False) - logger.info( - 'TLS interception using %s', generated_cert) + try: + # wrap_client also flushes client data before wrapping + # sending to client can raise, handle expected exceptions + self.wrap_client() + except OSError: + logger.error('OSError when wrapping client') + return True + except BrokenPipeError: + logger.error( + 'BrokenPipeError when wrapping client') + return True # Update all plugin connection reference for plugin in self.plugins.values(): plugin.client._conn = self.client.connection @@ -2535,8 +2563,22 @@ def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: if self.client.connection in readables: logger.debug('Client is ready for reads, reading') self.last_activity = time.time() + client_data: Optional[bytes] = None + + try: + client_data = self.client.recv(self.config.client_recvbuf_size) + except ssl.SSLWantReadError: # Try again later + logger.warning('SSLWantReadError encountered while reading from server, will retry ...') + return False + except socket.error as e: + if e.errno == errno.ECONNRESET: + logger.warning('Connection reset by upstream: %r' % e) + else: + logger.exception( + 'Exception while receiving from %s connection %r with reason %r' % + (self.client.tag, self.client.connection, e)) + return True - client_data = self.client.recv(self.config.client_recvbuf_size) if not client_data: logger.debug('Client closed connection, tearing down...') self.client.closed = True @@ -2571,8 +2613,6 @@ def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: for plugin_ in self.plugins.values(): if plugin_ != plugin: plugin_.client._conn = upgraded_sock - logger.debug( - 'Upgraded client conn for plugin %s', str(plugin_)) elif isinstance(upgraded_sock, bool) and upgraded_sock is True: return True except ProtocolException as e: diff --git a/start_chrome_with_proxy.sh b/start_chrome_with_proxy.sh new file mode 100755 index 0000000000..66e00cb87d --- /dev/null +++ b/start_chrome_with_proxy.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# proxy.py +# ~~~~~~~~ +# ⚡⚡⚡ Fast, Lightweight, Programmable Proxy Server in a single Python file. +# +# :copyright: (c) 2013-present by Abhinav Singh and contributors. +# :license: BSD, see LICENSE for more details. +# +# Usage +# ./chrome_with_proxy + +PROXY_PY_ADDR=$1 +if [ -z "$PROXY_PY_ADDR" ]; then + PROXY_PY_ADDR="localhost:8899" +fi + +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --no-first-run \ + --no-default-browser-check \ + --user-data-dir="$(mktemp -d -t 'chrome-remote_data_dir')" \ + --proxy-server=$PROXY_PY_ADDR \ + --ignore-urlfetcher-cert-requests \ + --ignore-certificate-errors diff --git a/tests.py b/tests.py index ac76487ada..47d24e51bb 100644 --- a/tests.py +++ b/tests.py @@ -99,13 +99,6 @@ def connection(self) -> Union[ssl.SSLSocket, socket.socket]: raise proxy.TcpConnectionUninitializedException() return self._conn - def testFlushThrowsBrokenPipeIfClosed(self) -> None: - self.conn = TestTcpConnection.TcpConnectionToTest() - self.conn.queue(b'some data') - self.conn.closed = True - with self.assertRaises(BrokenPipeError): - self.conn.flush() - def testThrowsKeyErrorIfNoConn(self) -> None: self.conn = TestTcpConnection.TcpConnectionToTest() with self.assertRaises(proxy.TcpConnectionUninitializedException): @@ -115,28 +108,6 @@ def testThrowsKeyErrorIfNoConn(self) -> None: with self.assertRaises(proxy.TcpConnectionUninitializedException): self.conn.close() - def testHandlesIOError(self) -> None: - _conn = mock.MagicMock() - _conn.recv.side_effect = IOError() - self.conn = TestTcpConnection.TcpConnectionToTest(_conn) - with mock.patch('proxy.logger') as mock_logger: - self.conn.recv() - mock_logger.exception.assert_called() - logging.info(mock_logger.exception.call_args[0][0].startswith( - 'Exception while receiving from connection')) - - def testHandlesConnReset(self) -> None: - _conn = mock.MagicMock() - e = IOError() - e.errno = errno.ECONNRESET - _conn.recv.side_effect = e - self.conn = TestTcpConnection.TcpConnectionToTest(_conn) - with mock.patch('proxy.logger') as mock_logger: - self.conn.recv() - mock_logger.exception.assert_not_called() - mock_logger.debug.assert_called() - self.assertEqual(mock_logger.debug.call_args[0][0], '%r' % e) - def testClosesIfNotClosed(self) -> None: _conn = mock.MagicMock() self.conn = TestTcpConnection.TcpConnectionToTest(_conn) From aee480c3b2a0fb1f4b4a0138dde2f8106ea66f26 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 18:15:12 -0700 Subject: [PATCH 44/54] Disable response parsing when TLS interception is enabled. See issue #127 --- proxy.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/proxy.py b/proxy.py index 39c9f80ee0..83f5278b69 100755 --- a/proxy.py +++ b/proxy.py @@ -418,7 +418,7 @@ def recv(self, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Optional[bytes]: logger.debug( 'received %d bytes from %s' % (len(data), self.tag)) - # logger.debug(data) + # logger.info(data) return data def close(self) -> bool: @@ -442,7 +442,7 @@ def flush(self) -> int: if self.buffer_size() == 0: return 0 sent: int = self.send(self.buffer) - # logger.debug(self.buffer[:sent]) + # logger.info(self.buffer[:sent]) self.buffer = self.buffer[sent:] logger.debug('flushed %d bytes to %s' % (sent, self.tag)) return sent @@ -1491,7 +1491,7 @@ def read_from_descriptors(self, r: List[Union[int, _HasFileno]]) -> bool: try: raw = self.server.recv(self.config.server_recvbuf_size) except ssl.SSLWantReadError: # Try again later - logger.warning('SSLWantReadError encountered while reading from server, will retry ...') + # logger.warning('SSLWantReadError encountered while reading from server, will retry ...') return False except socket.error as e: if e.errno == errno.ECONNRESET: @@ -1512,8 +1512,11 @@ def read_from_descriptors(self, r: List[Union[int, _HasFileno]]) -> bool: # parse incoming response packet # only for non-https requests and when # tls interception is enabled - if self.request.method != httpMethods.CONNECT or \ - self.config.tls_interception_enabled(): + if self.request.method != httpMethods.CONNECT: + # See https://github.com/abhinavsingh/proxy.py/issues/127 for why + # currently response parsing is disabled when TLS interception is enabled. + # + # or self.config.tls_interception_enabled(): if self.response.state == httpParserStates.COMPLETE: if self.pipeline_response is None: self.pipeline_response = HttpParser(httpParserTypes.RESPONSE_PARSER) @@ -1681,7 +1684,7 @@ def wrap_client(self) -> None: keyfile=self.config.ca_signing_key_file, certfile=generated_cert) self.client.connection.setblocking(False) - logger.info( + logger.debug( 'TLS interception using %s', generated_cert) def on_request_complete(self) -> Union[socket.socket, bool]: @@ -2568,11 +2571,11 @@ def handle_readables(self, readables: List[Union[int, _HasFileno]]) -> bool: try: client_data = self.client.recv(self.config.client_recvbuf_size) except ssl.SSLWantReadError: # Try again later - logger.warning('SSLWantReadError encountered while reading from server, will retry ...') + logger.warning('SSLWantReadError encountered while reading from client, will retry ...') return False except socket.error as e: if e.errno == errno.ECONNRESET: - logger.warning('Connection reset by upstream: %r' % e) + logger.warning('%r' % e) else: logger.exception( 'Exception while receiving from %s connection %r with reason %r' % From b8a48f9b6ef4427908f179ae116ab507cbd1e33d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 18:15:48 -0700 Subject: [PATCH 45/54] remove unused imports --- tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests.py b/tests.py index 47d24e51bb..b430c95748 100644 --- a/tests.py +++ b/tests.py @@ -8,7 +8,6 @@ :license: BSD, see LICENSE for more details. """ import base64 -import errno import ipaddress import json import logging From 5f8dff5fd06819344e360d7cb34b997cf486360f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 19:03:42 -0700 Subject: [PATCH 46/54] Within webserver parse pipelined requests only if we have a route --- proxy.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index 83f5278b69..a832a47469 100755 --- a/proxy.py +++ b/proxy.py @@ -149,6 +149,11 @@ def bytes_(s: Any, encoding: str = 'utf-8', errors: str = 'strict') -> Any: ('SWITCHING_PROTOCOLS', int), # 2xx ('OK', int), + # 3xx + ('MOVED_PERMANENTLY', int), + ('SEE_OTHER', int), + ('TEMPORARY_REDIRECT', int), + ('PERMANENT_REDIRECT', int), # 4xx ('BAD_REQUEST', int), ('UNAUTHORIZED', int), @@ -168,6 +173,7 @@ def bytes_(s: Any, encoding: str = 'utf-8', errors: str = 'strict') -> Any: httpStatusCodes = HttpStatusCodes( 100, 101, 200, + 301, 303, 307, 308, 400, 401, 403, 404, 407, 408, 418, 500, 501, 502, 504, 598, 599 ) @@ -2336,15 +2342,17 @@ def on_client_data(self, raw: bytes) -> Optional[bytes]: frame.reset() return None # If 1st valid request was completed and it's a HTTP/1.1 keep-alive + # And only if we have a route, parse pipeline requests elif self.request.state == httpParserStates.COMPLETE and \ - self.request.is_http_1_1_keep_alive(): + self.request.is_http_1_1_keep_alive() and \ + self.route is not None: if self.pipeline_request is None: self.pipeline_request = HttpParser(httpParserTypes.REQUEST_PARSER) self.pipeline_request.parse(raw) if self.pipeline_request.state == httpParserStates.COMPLETE: - assert self.route is not None self.route.handle_request(self.pipeline_request) if not self.pipeline_request.is_http_1_1_keep_alive(): + logger.error('Pipelined request is not keep-alive, will teardown request...') raise ProtocolException() self.pipeline_request = None return raw From 1a772fb2cda9c4f60ffea912583320a9eed8fe10 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 19:04:07 -0700 Subject: [PATCH 47/54] Add ShortLinkPlugin plugin --- plugin_examples.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/plugin_examples.py b/plugin_examples.py index 7ae2d73d31..f9da977958 100644 --- a/plugin_examples.py +++ b/plugin_examples.py @@ -14,6 +14,60 @@ from urllib import parse as urlparse import proxy +from proxy import HttpParser + + +class ShortLinkPlugin(proxy.HttpProxyBasePlugin): + """Add support for short links in your favorite browsers / applications. + + Example, enable ShortLinkPlugin and start browsing using short links + defined below in SHORT_LINKS dictionary: + + 1. g/ for google.com + 2. fb/ for facebook.com + 3. yt/ for youtube.com + 4. tw/ for twitter.com + 5. proxy/ for proxy.py internal web servers. + + Customize map below for your taste and need. + """ + + SHORT_LINKS = { + b'g': b'google.com', + b'fb': b'facebook.com', + b'yt': b'youtube.com', + b'tw': b'twitter.com', + b'proxy': b'localhost:8899', + } + + def before_upstream_connection(self, request: HttpParser) -> Optional[HttpParser]: + if request.host and proxy.DOT not in request.host: + return None + return request + + def handle_client_request(self, request: HttpParser) -> Optional[HttpParser]: + if request.host and proxy.DOT not in request.host: + if request.host in self.SHORT_LINKS: + self.client.queue(proxy.build_http_response( + proxy.httpStatusCodes.SEE_OTHER, reason=b'See Other', + headers={ + b'Location': b'http://' + self.SHORT_LINKS[request.host], + b'Content-Length': b'0', + b'Connection': b'close', + } + )) + else: + self.client.queue(proxy.build_http_response( + proxy.httpStatusCodes.NOT_FOUND, reason=b'NOT FOUND', + )) + return None + return request + + def handle_upstream_chunk(self, chunk: bytes) -> bytes: + return chunk + + def on_upstream_connection_close(self) -> None: + pass class ModifyPostDataPlugin(proxy.HttpProxyBasePlugin): From 789896e20ae75920fa4347e94ec1d53fd2ab6b19 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 19:26:33 -0700 Subject: [PATCH 48/54] Add more shortlinks --- plugin_examples.py | 38 +++++++++++++++++++++++++------------- proxy.py | 2 +- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/plugin_examples.py b/plugin_examples.py index f9da977958..e87f571841 100644 --- a/plugin_examples.py +++ b/plugin_examples.py @@ -20,28 +20,36 @@ class ShortLinkPlugin(proxy.HttpProxyBasePlugin): """Add support for short links in your favorite browsers / applications. - Example, enable ShortLinkPlugin and start browsing using short links - defined below in SHORT_LINKS dictionary: - - 1. g/ for google.com - 2. fb/ for facebook.com - 3. yt/ for youtube.com - 4. tw/ for twitter.com - 5. proxy/ for proxy.py internal web servers. - + Enable ShortLinkPlugin and speed up your daily browsing experience. + + Example: + * f/ for facebook.com + * g/ for google.com + * t/ for twitter.com + * y/ for youtube.com + * proxy/ for proxy.py internal web servers. Customize map below for your taste and need. + + Note that no path translation is not done i.e. + t/imoracle won't resolve to http://twitter.com/imoracle. + That is left as an exercise for you :P. """ SHORT_LINKS = { + b'a': b'amazon.com', + b'i': b'instagram.com', + b'l': b'linkedin.com', + b'f': b'facebook.com', b'g': b'google.com', - b'fb': b'facebook.com', - b'yt': b'youtube.com', - b'tw': b'twitter.com', + b't': b'twitter.com', + b'w': b'web.whatsapp.com', + b'y': b'youtube.com', b'proxy': b'localhost:8899', } def before_upstream_connection(self, request: HttpParser) -> Optional[HttpParser]: if request.host and proxy.DOT not in request.host: + # Avoid connecting to upstream return None return request @@ -51,7 +59,7 @@ def handle_client_request(self, request: HttpParser) -> Optional[HttpParser]: self.client.queue(proxy.build_http_response( proxy.httpStatusCodes.SEE_OTHER, reason=b'See Other', headers={ - b'Location': b'http://' + self.SHORT_LINKS[request.host], + b'Location': b'http://' + self.SHORT_LINKS[request.host] + proxy.SLASH, b'Content-Length': b'0', b'Connection': b'close', } @@ -59,6 +67,10 @@ def handle_client_request(self, request: HttpParser) -> Optional[HttpParser]: else: self.client.queue(proxy.build_http_response( proxy.httpStatusCodes.NOT_FOUND, reason=b'NOT FOUND', + headers={ + b'Content-Length': b'0', + b'Connection': b'close', + } )) return None return request diff --git a/proxy.py b/proxy.py index a832a47469..d5a1d1d707 100755 --- a/proxy.py +++ b/proxy.py @@ -124,7 +124,7 @@ def bytes_(s: Any, encoding: str = 'utf-8', errors: str = 'strict') -> Any: version = bytes_(__version__) -CRLF, COLON, WHITESPACE, COMMA, DOT, HTTP_1_1 = b'\r\n', b':', b' ', b',', b'.', b'HTTP/1.1' +CRLF, COLON, WHITESPACE, COMMA, DOT, SLASH, HTTP_1_1 = b'\r\n', b':', b' ', b',', b'.', b'/', b'HTTP/1.1' PROXY_AGENT_HEADER_KEY = b'Proxy-agent' PROXY_AGENT_HEADER_VALUE = b'proxy.py v' + version PROXY_AGENT_HEADER = PROXY_AGENT_HEADER_KEY + \ From bb9aa278d7239f660b211c19c7468aef4990d1d2 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 19:48:04 -0700 Subject: [PATCH 49/54] Add ShortLinkPlugin to README.md --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index 1ec7bcc910..8d78cc97ab 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,35 @@ See [plugin_examples.py](https://github.com/abhinavsingh/proxy.py/blob/develop/p All the examples below also works with `https` traffic but require additional flags and certificate generation. See [TLS Interception](#tls-interception). +## ShortLinkPlugin + +Add support for short links in your favorite browsers / applications. + +Start `proxy.py` as: + +``` +$ proxy.py \ + --plugins plugin_examples.ShortLinkPlugin +``` + +Now you can speed up your daily browsing experience by visiting your +favorite website using single character domain names :). This works +across all browsers. + +Following short links are enabled by default: + +Short Link | Destination URL +:--------: | :---------------: +a/ | amazon.com +i/ | instagram.com +l/ | linkedin.com +f/ | facebook.com +g/ | google.com +t/ | twitter.com +w/ | web.whatsapp.com +y/ | youtube.com +proxy/ | localhost:8899 + ## ModifyPostDataPlugin Modifies POST request body before sending request to upstream server. From fe94d7288af63ae577f96c74d3a3a32239faf4f2 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 19:51:53 -0700 Subject: [PATCH 50/54] Add path forwarding too instead of leaving as excercise ;) --- plugin_examples.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/plugin_examples.py b/plugin_examples.py index e87f571841..051c611176 100644 --- a/plugin_examples.py +++ b/plugin_examples.py @@ -30,9 +30,8 @@ class ShortLinkPlugin(proxy.HttpProxyBasePlugin): * proxy/ for proxy.py internal web servers. Customize map below for your taste and need. - Note that no path translation is not done i.e. - t/imoracle won't resolve to http://twitter.com/imoracle. - That is left as an exercise for you :P. + Paths are also preserved. E.g. t/imoracle will + resolve to http://twitter.com/imoracle. """ SHORT_LINKS = { @@ -59,7 +58,7 @@ def handle_client_request(self, request: HttpParser) -> Optional[HttpParser]: self.client.queue(proxy.build_http_response( proxy.httpStatusCodes.SEE_OTHER, reason=b'See Other', headers={ - b'Location': b'http://' + self.SHORT_LINKS[request.host] + proxy.SLASH, + b'Location': b'http://' + self.SHORT_LINKS[request.host] + request.path, b'Content-Length': b'0', b'Connection': b'close', } From 0ac69b3205875d62ae53e5182a6075de7a901c7a Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 21:34:07 -0700 Subject: [PATCH 51/54] Add shortlink to TOC --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8d78cc97ab..3c23e58647 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Table of Contents * [Stable version](#stable-version-from-docker-hub) * [Development version](#build-development-version-locally) * [Plugin Examples](#plugin-examples) + * [ShortLinkPlugin](#shortlinkplugin) * [ModifyPostDataPlugin](#modifypostdataplugin) * [ProposedRestApiPlugin](#proposedrestapiplugin) * [RedirectToCustomServerPlugin](#redirecttocustomserverplugin) From d4fe97f38b62846a0c72b4d44602e1ee901089a3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 21:46:09 -0700 Subject: [PATCH 52/54] Ensure no socket leaks --- proxy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/proxy.py b/proxy.py index d5a1d1d707..4324505605 100755 --- a/proxy.py +++ b/proxy.py @@ -1101,11 +1101,13 @@ def run_once(self) -> None: def run(self) -> None: self.running = True self.selector = selectors.DefaultSelector() + fileno = recv_handle(self.work_queue) self.sock = socket.fromfd( - recv_handle(self.work_queue), + fileno, family=self.family, type=socket.SOCK_STREAM ) + os.close(fileno) try: self.selector.register(self.sock, selectors.EVENT_READ) self.start_threadless_process() @@ -2507,9 +2509,11 @@ def shutdown(self) -> None: logger.debug('Client connection closed') def fromfd(self, fileno: int) -> socket.socket: - return socket.fromfd( + conn = socket.fromfd( fileno, family=socket.AF_INET if self.config.hostname.version == 4 else socket.AF_INET6, type=socket.SOCK_STREAM) + os.close(fileno) + return conn def optionally_wrap_socket( self, conn: socket.socket) -> Union[ssl.SSLSocket, socket.socket]: From 0d3040fd4d0ec4bd3a33bea2b0c97bf4776aa1ac Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 22:04:49 -0700 Subject: [PATCH 53/54] Ensure no leaks --- plugin_examples.py | 3 +- tests.py | 90 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/plugin_examples.py b/plugin_examples.py index 051c611176..cdd1f66946 100644 --- a/plugin_examples.py +++ b/plugin_examples.py @@ -55,10 +55,11 @@ def before_upstream_connection(self, request: HttpParser) -> Optional[HttpParser def handle_client_request(self, request: HttpParser) -> Optional[HttpParser]: if request.host and proxy.DOT not in request.host: if request.host in self.SHORT_LINKS: + path = proxy.SLASH if not request.path else request.path self.client.queue(proxy.build_http_response( proxy.httpStatusCodes.SEE_OTHER, reason=b'See Other', headers={ - b'Location': b'http://' + self.SHORT_LINKS[request.host] + request.path, + b'Location': b'http://' + self.SHORT_LINKS[request.host] + path, b'Content-Length': b'0', b'Connection': b'close', } diff --git a/tests.py b/tests.py index b430c95748..dddcd2a2f9 100644 --- a/tests.py +++ b/tests.py @@ -294,6 +294,7 @@ def setUp( mock_protocol_handler, config=self.protocol_config) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') @@ -301,7 +302,8 @@ def test_continues_when_no_events( self, mock_recv_handle: mock.Mock, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -317,6 +319,7 @@ def test_continues_when_no_events( sock.accept.assert_not_called() self.mock_protocol_handler.assert_not_called() + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') @@ -324,7 +327,8 @@ def test_worker_doesnt_teardown_on_blocking_io_error( self, mock_recv_handle: mock.Mock, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -340,6 +344,7 @@ def test_worker_doesnt_teardown_on_blocking_io_error( self.mock_protocol_handler.assert_not_called() + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.recv_handle') @@ -347,7 +352,8 @@ def test_accepts_client_from_server_socket( self, mock_recv_handle: mock.Mock, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: fileno = 10 conn = mock.MagicMock() addr = mock.MagicMock() @@ -967,9 +973,13 @@ def test_handshake(self, mock_connect: mock.Mock, mock_b64encode: mock.Mock) -> class TestHttpProtocolHandler(unittest.TestCase): + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') - def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + def setUp(self, + mock_fromfd: mock.Mock, + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self.fileno = 10 self._addr = ('127.0.0.1', 54382) self._conn = mock_fromfd.return_value @@ -982,6 +992,7 @@ def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: self.mock_selector = mock_selector self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() @mock.patch('proxy.TcpServerConnection') @@ -1095,10 +1106,14 @@ def test_proxy_connection_failed(self) -> None: self.proxy.run_once() self.assertEqual(self.proxy.client.buffer, proxy.ProxyConnectionFailed.RESPONSE_PKT) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_proxy_authentication_failed( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, + mock_fromfd: mock.Mock, + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self._conn = mock_fromfd.return_value self.mock_selector_for_client_read(mock_selector) config = proxy.ProtocolConfig( @@ -1108,6 +1123,7 @@ def test_proxy_authentication_failed( b'proxy.HttpProxyPlugin,proxy.HttpWebServerPlugin') self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() self._conn.recv.return_value = proxy.CRLF.join([ b'GET http://abhinavsingh.com HTTP/1.1', @@ -1119,13 +1135,15 @@ def test_proxy_authentication_failed( self.proxy.client.buffer, proxy.ProxyAuthenticationFailed.RESPONSE_PKT) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.TcpServerConnection') def test_authenticated_proxy_http_get( self, mock_server_connection: mock.Mock, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self._conn = mock_fromfd.return_value self.mock_selector_for_client_read(mock_selector) @@ -1141,6 +1159,7 @@ def test_authenticated_proxy_http_get( self.proxy = proxy.ProtocolHandler( self.fileno, addr=self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() assert self.http_server_port is not None @@ -1167,13 +1186,15 @@ def test_authenticated_proxy_http_get( ]) self.assert_data_queued(mock_server_connection, server) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') @mock.patch('proxy.TcpServerConnection') def test_authenticated_proxy_http_tunnel( self, mock_server_connection: mock.Mock, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: server = mock_server_connection.return_value server.connect.return_value = True server.buffer_size.return_value = 0 @@ -1188,6 +1209,7 @@ def test_authenticated_proxy_http_tunnel( self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() assert self.http_server_port is not None @@ -1281,9 +1303,10 @@ def mock_selector_for_client_read(self, mock_selector: mock.Mock) -> None: class TestWebServerPlugin(unittest.TestCase): + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') - def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, mock_os_close: mock.Mock) -> None: self.fileno = 10 self._addr = ('127.0.0.1', 54382) self._conn = mock_fromfd.return_value @@ -1293,16 +1316,20 @@ def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: b'proxy.HttpProxyPlugin,proxy.HttpWebServerPlugin') self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_pac_file_served_from_disk( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: pac_file = 'proxy.pac' self._conn = mock_fromfd.return_value self.mock_selector_for_client_read(mock_selector) self.init_and_make_pac_file_request(pac_file) + mock_os_close.assert_called_with(self.fileno) self.proxy.run_once() self.assertEqual( self.proxy.request.state, @@ -1315,14 +1342,17 @@ def test_pac_file_served_from_disk( }, body=f.read() )) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_pac_file_served_from_buffer( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self._conn = mock_fromfd.return_value self.mock_selector_for_client_read(mock_selector) pac_file_content = b'function FindProxyForURL(url, host) { return "PROXY localhost:8899; DIRECT"; }' self.init_and_make_pac_file_request(proxy.text_(pac_file_content)) + mock_os_close.assert_called_with(self.fileno) self.proxy.run_once() self.assertEqual( self.proxy.request.state, @@ -1334,10 +1364,12 @@ def test_pac_file_served_from_buffer( }, body=pac_file_content )) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_default_web_server_returns_404( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self._conn = mock_fromfd.return_value mock_selector.return_value.select.return_value = [( selectors.SelectorKey( @@ -1350,6 +1382,7 @@ def test_default_web_server_returns_404( b'proxy.HttpProxyPlugin,proxy.HttpWebServerPlugin') self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() self._conn.recv.return_value = proxy.CRLF.join([ b'GET /hello HTTP/1.1', @@ -1363,10 +1396,12 @@ def test_default_web_server_returns_404( self.proxy.client.buffer, proxy.HttpWebServerPlugin.DEFAULT_404_RESPONSE) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_static_web_server_serves( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: # Setup a static directory static_server_dir = os.path.join(tempfile.gettempdir(), 'static') index_file_path = os.path.join(static_server_dir, 'index.html') @@ -1418,10 +1453,14 @@ def test_static_web_server_serves( body=html_file_content )) + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def test_static_web_server_serves_404( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + self, + mock_fromfd: mock.Mock, + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self._conn = mock_fromfd.return_value self._conn.recv.return_value = proxy.build_http_request(b'GET', b'/not-found.html') @@ -1443,6 +1482,7 @@ def test_static_web_server_serves_404( self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() self.proxy.run_once() @@ -1453,15 +1493,17 @@ def test_static_web_server_serves_404( self.assertEqual(self._conn.send.call_args[0][0], proxy.HttpWebServerPlugin.DEFAULT_404_RESPONSE) + @mock.patch('os.close') @mock.patch('socket.fromfd') def test_on_client_connection_called_on_teardown( - self, mock_fromfd: mock.Mock) -> None: + self, mock_fromfd: mock.Mock, mock_os_close: mock.Mock) -> None: config = proxy.ProtocolConfig() plugin = mock.MagicMock() config.plugins = {b'ProtocolHandlerPlugin': [plugin]} self._conn = mock_fromfd.return_value self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() plugin.assert_called() with mock.patch.object(self.proxy, 'run_once') as mock_run_once: @@ -1493,11 +1535,13 @@ def mock_selector_for_client_read(self, mock_selector: mock.Mock) -> None: class TestHttpProxyPlugin(unittest.TestCase): + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def setUp(self, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self.mock_fromfd = mock_fromfd self.mock_selector = mock_selector @@ -1512,6 +1556,7 @@ def setUp(self, self._conn = mock_fromfd.return_value self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() def test_proxy_plugin_initialized(self) -> None: @@ -1566,11 +1611,13 @@ def test_proxy_plugin_before_upstream_connection_can_teardown( class TestHttpProxyPluginExamples(unittest.TestCase): + @mock.patch('os.close') @mock.patch('selectors.DefaultSelector') @mock.patch('socket.fromfd') def setUp(self, mock_fromfd: mock.Mock, - mock_selector: mock.Mock) -> None: + mock_selector: mock.Mock, + mock_os_close: mock.Mock) -> None: self.fileno = 10 self._addr = ('127.0.0.1', 54382) self.config = proxy.ProtocolConfig() @@ -1588,6 +1635,7 @@ def setUp(self, self._conn = mock_fromfd.return_value self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() @mock.patch('proxy.TcpServerConnection') @@ -1788,6 +1836,7 @@ def closed() -> bool: class TestHttpProxyTlsInterception(unittest.TestCase): + @mock.patch('os.close') @mock.patch('ssl.wrap_socket') @mock.patch('ssl.create_default_context') @mock.patch('proxy.TcpServerConnection') @@ -1801,7 +1850,8 @@ def test_e2e( mock_popen: mock.Mock, mock_server_conn: mock.Mock, mock_ssl_context: mock.Mock, - mock_ssl_wrap: mock.Mock) -> None: + mock_ssl_wrap: mock.Mock, + mock_os_close: mock.Mock) -> None: host, port = uuid.uuid4().hex, 443 netloc = '{0}:{1}'.format(host, port) @@ -1841,6 +1891,7 @@ def mock_connection() -> Any: self._conn = mock_fromfd.return_value self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() self.plugin.assert_called() @@ -1919,6 +1970,7 @@ def mock_connection() -> Any: class TestHttpProxyPluginExamplesWithTlsInterception(unittest.TestCase): + @mock.patch('os.close') @mock.patch('ssl.wrap_socket') @mock.patch('ssl.create_default_context') @mock.patch('proxy.TcpServerConnection') @@ -1931,7 +1983,8 @@ def setUp(self, mock_popen: mock.Mock, mock_server_conn: mock.Mock, mock_ssl_context: mock.Mock, - mock_ssl_wrap: mock.Mock) -> None: + mock_ssl_wrap: mock.Mock, + mock_os_close: mock.Mock) -> None: self.mock_fromfd = mock_fromfd self.mock_selector = mock_selector self.mock_popen = mock_popen @@ -1957,6 +2010,7 @@ def setUp(self, mock_fromfd.return_value = self._conn self.proxy = proxy.ProtocolHandler( self.fileno, self._addr, config=self.config) + mock_os_close.assert_called_with(self.fileno) self.proxy.initialize() self.server = self.mock_server_conn.return_value From efb8dfe216338a77f69d1a69b9d2560154a518a8 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 15 Oct 2019 23:09:15 -0700 Subject: [PATCH 54/54] Naming --- start_chrome_with_proxy.sh => chrome_with_proxy.sh | 0 monitor_proxy_open_files.sh => monitor_open_files.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename start_chrome_with_proxy.sh => chrome_with_proxy.sh (100%) rename monitor_proxy_open_files.sh => monitor_open_files.sh (100%) diff --git a/start_chrome_with_proxy.sh b/chrome_with_proxy.sh similarity index 100% rename from start_chrome_with_proxy.sh rename to chrome_with_proxy.sh diff --git a/monitor_proxy_open_files.sh b/monitor_open_files.sh similarity index 100% rename from monitor_proxy_open_files.sh rename to monitor_open_files.sh