dropseed
diff --git a/‎plain/plain/runtime/global_settings.py‎
Lines changed: 1 addition & 0 deletions b/‎plain/plain/runtime/global_settings.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎plain/plain/server/README.md‎
Lines changed: 19 additions & 4 deletions b/‎plain/plain/server/README.md‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎plain/plain/server/arbiter.py‎
Lines changed: 3 additions & 1 deletion b/‎plain/plain/server/arbiter.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎plain/plain/server/http/response.py‎
Lines changed: 76 additions & 5 deletions b/‎plain/plain/server/http/response.py‎
Lines changed: 76 additions & 5 deletions
diff --git a/‎plain/plain/server/http/unreader.py‎
Lines changed: 68 additions & 3 deletions b/‎plain/plain/server/http/unreader.py‎
Lines changed: 68 additions & 3 deletions
@@ -191,6 +191,7 @@
 ]
 SERVER_GRACEFUL_TIMEOUT: int = 30
 SERVER_SENDFILE: bool = True
+SERVER_CONNECTIONS: int = 1000
 
 # MARK: Preflight Checks
 
 
@@ -32,13 +32,13 @@ plain server --reload
 The server uses two levels of concurrency:
 
 - **Workers** are separate OS processes. Each worker runs independently with its own memory. The default is `0` (auto), which spawns one worker per CPU core.
-- **Threads** run inside each worker. Threads share memory within a worker and handle concurrent requests using a thread pool. The default is 4 threads per worker.
+- **Threads** run inside each worker. Threads handle application code (middleware and views) using a thread pool. All network I/O (accepting connections, reading requests, writing responses, TLS, keepalive) is handled asynchronously on the event loop without consuming threads. The default is 4 threads per worker.
 
 Total concurrent requests = `workers × threads`. On a 4-core machine with the defaults, that's `4 × 4 = 16` concurrent requests.
 
 **When to adjust workers:** Workers provide true parallelism since each is a separate process with its own Python GIL. More workers means more memory usage but better CPU utilization. Use `--workers 0` (the default) to match your CPU cores, or set an explicit number.
 
-**When to adjust threads:** Threads are efficient for I/O-bound work (database queries, external API calls) since they release the GIL while waiting. Most web applications are I/O-bound, so the default of 4 threads works well. Increase threads if your application spends a lot of time waiting on I/O. Decrease to 1 if you need to avoid thread-safety concerns.
+**When to adjust threads:** Threads are used exclusively for running your application code (middleware and views). This means `SERVER_THREADS` directly controls how many views can execute in parallel — it's not shared with I/O operations. Increase threads if your views spend a lot of time waiting on I/O (database queries, external API calls). Decrease to 1 if you need to avoid thread-safety concerns.
 
 **Long-lived connections:** Async views (SSE, WebSocket) run on the worker's event loop instead of occupying a thread pool slot. This means long-lived connections don't reduce your capacity for regular requests.
 
@@ -82,6 +82,7 @@ SERVER_ACCESS_LOG = True
 SERVER_ACCESS_LOG_FIELDS = ["method", "path", "query", "status", "duration_ms", "size", "ip", "user_agent", "referer"]
 SERVER_GRACEFUL_TIMEOUT = 30
 SERVER_SENDFILE = True
+SERVER_CONNECTIONS = 1000
 ```
 
 Settings can also be set via environment variables with the `PLAIN_` prefix (e.g., `PLAIN_SERVER_WORKERS=4`).
@@ -193,21 +194,35 @@ plain server --timeout 120
 
 ## Architecture
 
+Each worker process runs an asyncio event loop that handles all network I/O. A thread pool is reserved exclusively for application code.
+
 ```mermaid
 graph TD
     A[Arbiter] -->|fork per core| W[Worker]
     W --> EL[asyncio event loop]
     EL -->|accept| C[Connection]
     C -->|wait readable| EL
+    C -->|TLS handshake| TP_TLS[Thread pool]
+    TP_TLS --> EL
     C -->|TLS ALPN| P{Protocol?}
     P -->|h2| H2[HTTP/2 handler]
-    P -->|http/1.1| H1[Parse in thread pool]
+    P -->|http/1.1| HDR[Read headers async]
+    HDR --> BODY{Body size?}
+    BODY -->|"small (≤ limit)"| PRE[Pre-buffer body async]
+    BODY -->|"large (> limit)"| BRIDGE[AsyncBridgeUnreader]
+    PRE --> PARSE[Parse request]
+    BRIDGE -->|"parse in thread pool"| PARSE
     H2 -->|"h2 codec (sans-I/O)"| STREAMS[Multiplexed streams]
     STREAMS -->|per stream| TP[Thread pool]
-    H1 --> TP
+    PARSE --> TP
     TP --> MW[before_request + view + after_response]
+    MW -->|write response async| EL
 ```
 
+**Request body handling:** Small request bodies (≤ `DATA_UPLOAD_MAX_MEMORY_SIZE`, default 2.5MB) are pre-buffered on the event loop before parsing. Large bodies use `AsyncBridgeUnreader` which streams data lazily from the socket — the parser runs in the thread pool and bridges back to the event loop for socket reads. This keeps memory bounded while supporting large file uploads through multipart streaming to temp files.
+
+**Async views note:** Async views that read the request body work with pre-buffered (small) requests. For large bodies on the bridge path, body reads must happen in the thread pool (sync views). If you need async views to handle large uploads, increase `DATA_UPLOAD_MAX_MEMORY_SIZE` to cover your expected body sizes.
+
 ## Installation
 
 The server module is included with Plain. No additional installation is required.
@@ -106,7 +106,9 @@ def _start(self) -> None:
             plain.runtime.__version__,
         )
 
-        check_worker_config(self.app.threads, self.log)
+        from plain.runtime import settings
+
+        check_worker_config(self.app.threads, settings.SERVER_CONNECTIONS, self.log)
 
     def _handle_signal(self, sig: int, frame: object) -> None:
         self._shutdown_event.set()
 
@@ -6,6 +6,7 @@
 # See the LICENSE for more information.
 #
 # Vendored and modified for Plain.
+import asyncio
 import io
 import logging
 import os
@@ -107,13 +108,10 @@ def create_request(
 ) -> HttpRequest:
     """Build a plain.http.Request directly from the server's parsed HTTP message."""
 
-    # Handle 100-continue before merging headers
+    # Extract Host header (100-continue is handled during async body reading)
     host = None
     for hdr_name, hdr_value in req.headers:
-        if hdr_name == "EXPECT":
-            if hdr_value.lower() == "100-continue":
-                sock.send(b"HTTP/1.1 100 Continue\r\n\r\n")
-        elif hdr_name == "HOST":
+        if hdr_name == "HOST":
             host = hdr_value
 
     headers = _merge_headers(req.headers)
@@ -420,3 +418,76 @@ def close(self) -> None:
             self.send_headers()
         if self.chunked:
             util.write_chunk(self.sock, b"")
+
+    # ------------------------------------------------------------------
+    # Async write methods — use loop.sock_sendall() instead of blocking
+    # sendall(). The socket must be non-blocking (managed by asyncio).
+    # ------------------------------------------------------------------
+
+    async def async_send_headers(self) -> None:
+        if self.headers_sent:
+            return
+        tosend = self.default_headers()
+        tosend.extend([f"{k}: {v}\r\n" for k, v in self.headers])
+        header_str = "{}\r\n".format("".join(tosend))
+        await util.async_sendall(self.sock, util.to_bytestring(header_str, "latin-1"))
+        self.headers_sent = True
+
+    async def async_write(self, arg: bytes) -> None:
+        await self.async_send_headers()
+        if not isinstance(arg, bytes):
+            raise TypeError(f"{arg!r} is not a byte")
+        arglen = len(arg)
+        tosend = arglen
+        if self.response_length is not None:
+            if self.sent >= self.response_length:
+                return
+            tosend = min(self.response_length - self.sent, tosend)
+            if tosend < arglen:
+                arg = arg[:tosend]
+
+        if self.chunked and tosend == 0:
+            return
+
+        self.sent += tosend
+        if self.chunked:
+            chunk_size = f"{len(arg):X}\r\n"
+            chunk = b"".join([chunk_size.encode("utf-8"), arg, b"\r\n"])
+            await util.async_sendall(self.sock, chunk)
+        else:
+            await util.async_sendall(self.sock, arg)
+
+    async def async_write_response(self, http_response: Any) -> None:
+        """Write a plain.http.ResponseBase using async I/O."""
+        self.prepare_response(http_response)
+
+        if (
+            isinstance(http_response, FileResponse)
+            and http_response.file_to_stream is not None
+        ):
+            file_wrapper = FileWrapper(
+                http_response.file_to_stream, http_response.block_size
+            )
+            http_response.file_to_stream.close = http_response.close
+            # Read file chunks in the default executor (not the app thread pool)
+            # to avoid blocking the event loop. File reads are fast and shouldn't
+            # contend with app threads.
+            loop = asyncio.get_running_loop()
+            while True:
+                chunk = await loop.run_in_executor(
+                    None, file_wrapper.filelike.read, file_wrapper.blksize
+                )
+                if not chunk:
+                    break
+                await self.async_write(chunk)
+        else:
+            for chunk in http_response:
+                await self.async_write(chunk)
+
+        await self.async_close()
+
+    async def async_close(self) -> None:
+        if not self.headers_sent:
+            await self.async_send_headers()
+        if self.chunked:
+            await util.async_sendall(self.sock, b"0\r\n\r\n")
@@ -6,14 +6,13 @@
 # See the LICENSE for more information.
 #
 # Vendored and modified for Plain.
+import asyncio
 import io
 import os
 import socket
 from collections.abc import Iterable, Iterator
-from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-    pass
+from .. import util
 
 # Classes that can undo reading data from
 # a given type of data source.
@@ -86,3 +85,69 @@ def chunk(self) -> bytes:
         except StopIteration:
             self.iter = None
             return b""
+
+
+class BufferUnreader(Unreader):
+    """Unreader backed by pre-read bytes with no socket I/O.
+
+    Used when headers and body have been read asynchronously on the
+    event loop and the data is already in memory.  The parser reads
+    headers from the buffer and sets up body readers (ChunkedReader,
+    LengthReader) that also read from this buffer.
+    """
+
+    def __init__(self, data: bytes) -> None:
+        super().__init__()
+        self.buf.write(data)
+
+    def chunk(self) -> bytes:
+        # All data is pre-buffered; nothing more to read.
+        return b""
+
+
+class AsyncBridgeUnreader(Unreader):
+    """Unreader that bridges async socket reads to sync parser reads.
+
+    Used for large request bodies that shouldn't be fully pre-buffered.
+    Headers and any initial body bytes are in the buffer. When the buffer
+    is exhausted, chunk() bridges to the event loop via
+    run_coroutine_threadsafe for lazy socket reads.
+
+    IMPORTANT: chunk() blocks the calling thread, so this unreader must
+    only be used from a thread pool — never from the event loop thread.
+    """
+
+    def __init__(
+        self,
+        data: bytes,
+        sock: socket.socket,
+        loop: asyncio.AbstractEventLoop,
+        timeout: float = 30,
+    ) -> None:
+        super().__init__()
+        self.buf.write(data)
+        self._sock = sock
+        self._loop = loop
+        self._timeout = timeout
+        self._eof = False
+        self.socket_bytes_read = 0
+
+    def chunk(self) -> bytes:
+        if self._eof:
+            return b""
+        future = asyncio.run_coroutine_threadsafe(
+            util.async_recv(self._sock, 8192), self._loop
+        )
+        try:
+            # On Python 3.11+, concurrent.futures.TimeoutError is
+            # builtins.TimeoutError so this except clause catches it.
+            data = future.result(timeout=self._timeout)
+        except TimeoutError:
+            future.cancel()
+            self._eof = True
+            raise
+        if not data:
+            self._eof = True
+        else:
+            self.socket_bytes_read += len(data)
+        return data
Original file line number	Diff line number	Diff line change
`@@ -191,6 +191,7 @@`
`191`	`191`	`]`
`192`	`192`	`SERVER_GRACEFUL_TIMEOUT: int = 30`
`193`	`193`	`SERVER_SENDFILE: bool = True`
	`194`	`+SERVER_CONNECTIONS: int = 1000`
`194`	`195`
`195`	`196`	`# MARK: Preflight Checks`
`196`	`197`
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,9 @@ def _start(self) -> None:`
`106`	`106`	`plain.runtime.__version__,`
`107`	`107`	`)`
`108`	`108`
`109`		`- check_worker_config(self.app.threads, self.log)`
	`109`	`+ from plain.runtime import settings`
	`110`	`+`
	`111`	`+ check_worker_config(self.app.threads, settings.SERVER_CONNECTIONS, self.log)`
`110`	`112`
`111`	`113`	`def _handle_signal(self, sig: int, frame: object) -> None:`
`112`	`114`	`self._shutdown_event.set()`