From ba74f09efbf5128a105d43ebb4fa21d2f3766381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 16 Dec 2024 02:09:37 +0100 Subject: [PATCH 1/4] Byte readers, streaming Filesystem, rewrite ZipStreamReader --- composer.json | 1 - .../Runner/Step/UnzipStepRunner.php | 3 +- src/WordPress/ByteReader/WP_Byte_Reader.php | 16 + src/WordPress/ByteReader/WP_File_Reader.php | 109 ++++ .../ByteReader/WP_GZ_File_Reader.php | 28 ++ .../WP_Remote_File_Ranged_Reader.php | 194 ++++++++ .../ByteReader/WP_Remote_File_Reader.php | 115 +++++ .../Filesystem/WP_Abstract_Filesystem.php | 101 ++++ .../Filesystem/WP_File_Visitor_Event.php | 32 ++ src/WordPress/Filesystem/WP_Filesystem.php | 74 +++ .../Filesystem/WP_Filesystem_Visitor.php | 91 ++++ .../Filesystem/WP_Zip_Filesystem.php | 270 ++++++++++ src/WordPress/Zip/NewZipStreamReader.php | 357 ------------- src/WordPress/Zip/WP_Zip_Filesystem.php | 271 ++++++++++ src/WordPress/Zip/ZipStreamReader.php | 467 ++++++++++++------ src/WordPress/Zip/functions.php | 36 -- 16 files changed, 1612 insertions(+), 553 deletions(-) create mode 100644 src/WordPress/ByteReader/WP_Byte_Reader.php create mode 100644 src/WordPress/ByteReader/WP_File_Reader.php create mode 100644 src/WordPress/ByteReader/WP_GZ_File_Reader.php create mode 100644 src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php create mode 100644 src/WordPress/ByteReader/WP_Remote_File_Reader.php create mode 100644 src/WordPress/Filesystem/WP_Abstract_Filesystem.php create mode 100644 src/WordPress/Filesystem/WP_File_Visitor_Event.php create mode 100644 src/WordPress/Filesystem/WP_Filesystem.php create mode 100644 src/WordPress/Filesystem/WP_Filesystem_Visitor.php create mode 100644 src/WordPress/Filesystem/WP_Zip_Filesystem.php delete mode 100644 src/WordPress/Zip/NewZipStreamReader.php create mode 100644 src/WordPress/Zip/WP_Zip_Filesystem.php delete mode 100644 src/WordPress/Zip/functions.php diff --git a/composer.json b/composer.json index 487361b7..5c9aeb01 100644 --- a/composer.json +++ b/composer.json @@ -29,7 +29,6 @@ }, "files": [ "src/WordPress/Blueprints/functions.php", - "src/WordPress/Zip/functions.php", "src/WordPress/Streams/stream_str_replace.php" ] }, diff --git a/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php b/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php index f3d7db64..3ccc4d93 100644 --- a/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php +++ b/src/WordPress/Blueprints/Runner/Step/UnzipStepRunner.php @@ -4,7 +4,6 @@ use WordPress\Blueprints\Model\DataClass\UnzipStep; use WordPress\Blueprints\Progress\Tracker; -use function WordPress\Zip\zip_extract_to; class UnzipStepRunner extends BaseStepRunner { @@ -22,6 +21,6 @@ public function run( $progress_tracker->set( 10, 'Unzipping...' ); $resolved_to_path = $this->getRuntime()->resolvePath( $input->extractToPath ); - zip_extract_to( $this->getResource( $input->zipFile ), $resolved_to_path ); + throw new \Exception("Not implemented at the moment. Needs to be updated to use the new ZipStreamReader API."); } } diff --git a/src/WordPress/ByteReader/WP_Byte_Reader.php b/src/WordPress/ByteReader/WP_Byte_Reader.php new file mode 100644 index 00000000..77fdeeb6 --- /dev/null +++ b/src/WordPress/ByteReader/WP_Byte_Reader.php @@ -0,0 +1,16 @@ +file_path = $file_path; + $this->chunk_size = $chunk_size; + } + + public function length(): int { + return filesize( $this->file_path ); + } + + public function tell(): int { + // Save the previous offset, not the current one. + // This way, after resuming, the next read will yield the same $output_bytes + // as we have now. + return $this->offset_in_file - $this->last_chunk_size; + } + + public function seek( $offset_in_file ): bool { + if ( ! is_int( $offset_in_file ) ) { + _doing_it_wrong( __METHOD__, 'Cannot set a file reader cursor to a non-integer offset.', '1.0.0' ); + return false; + } + $this->offset_in_file = $offset_in_file; + $this->last_chunk_size = 0; + $this->output_bytes = ''; + if ( $this->file_pointer ) { + if ( false === fseek( $this->file_pointer, $this->offset_in_file ) ) { + return false; + } + } + return true; + } + + public function close() { + if(!$this->file_pointer) { + return false; + } + if(!fclose($this->file_pointer)) { + $this->last_error = 'Failed to close file pointer'; + return false; + } + $this->file_pointer = null; + $this->state = static::STATE_FINISHED; + return true; + } + + public function is_finished(): bool { + return ! $this->output_bytes && $this->state === static::STATE_FINISHED; + } + + public function get_bytes(): string { + return $this->output_bytes; + } + + public function get_last_error(): ?string { + return $this->last_error; + } + + public function next_bytes(): bool { + $this->output_bytes = ''; + $this->last_chunk_size = 0; + if ( $this->last_error || $this->is_finished() ) { + return false; + } + if ( ! $this->file_pointer ) { + $this->file_pointer = fopen( $this->file_path, 'r' ); + if ( $this->offset_in_file ) { + fseek( $this->file_pointer, $this->offset_in_file ); + } + } + $bytes = fread( $this->file_pointer, $this->chunk_size ); + if ( ! $bytes && feof( $this->file_pointer ) ) { + return false; + } + $this->last_chunk_size = strlen( $bytes ); + $this->offset_in_file += $this->last_chunk_size; + $this->output_bytes .= $bytes; + return true; + } +} diff --git a/src/WordPress/ByteReader/WP_GZ_File_Reader.php b/src/WordPress/ByteReader/WP_GZ_File_Reader.php new file mode 100644 index 00000000..1acb729e --- /dev/null +++ b/src/WordPress/ByteReader/WP_GZ_File_Reader.php @@ -0,0 +1,28 @@ +output_bytes = ''; + if ( $this->last_error || $this->is_finished() ) { + return false; + } + if ( ! $this->file_pointer ) { + $this->file_pointer = gzopen( $this->file_path, 'r' ); + if ( $this->offset_in_file ) { + gzseek( $this->file_pointer, $this->offset_in_file ); + } + } + $bytes = gzread( $this->file_pointer, $this->chunk_size ); + if ( ! $bytes && gzeof( $this->file_pointer ) ) { + gzclose( $this->file_pointer ); + $this->state->finish(); + return false; + } + $this->offset_in_file += strlen( $bytes ); + $this->output_bytes .= $bytes; + return true; + } +} diff --git a/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php new file mode 100644 index 00000000..341370c7 --- /dev/null +++ b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php @@ -0,0 +1,194 @@ +seek(0); + * $file->request_bytes(100); + * while($file->next_chunk()) { + * var_dump($file->get_bytes()); + * } + * $file->seek(600); + * $file->request_bytes(40); + * while($file->next_chunk()) { + * var_dump($file->get_bytes()); + * } + * + * @TODO: Verify that the remote server supports range requests. + * @TODO: Support requesting multiple ranges in a single request. + * @TODO: Abort in-progress requests when seeking to a new offset. + */ +class WP_Remote_File_Ranged_Reader { + + /** + * @var WordPress\AsyncHttp\Client + */ + private $client; + private $url; + private $remote_file_length; + + private $current_request; + private $offset_in_remote_file = 0; + private $offset_in_current_chunk = 0; + private $current_chunk; + private $expected_chunk_size; + + public function __construct( $url, $options = array() ) { + $this->client = new \WordPress\AsyncHttp\Client(); + $this->url = $url; + } + + public function length(): int { + throw new \Exception( 'Not implemented yet.' ); + } + + public function request_bytes( $bytes ) { + if ( null === $this->remote_file_length ) { + $content_length = $this->resolve_content_length(); + if ( false === $content_length ) { + // The remote server won't tell us what the content length is + // @TODO: What should we do in this case? Content-length is critical for + // stream-decompressing remote zip files, but we may not need it + // for other use-cases. + return false; + } + $this->remote_file_length = $content_length; + } + + if ( $this->offset_in_remote_file < 0 || $this->offset_in_remote_file + $bytes > $this->remote_file_length ) { + // TODO: Think through error handling + return false; + } + + $this->seek( $this->offset_in_remote_file ); + + $this->current_request = new \WordPress\AsyncHttp\Request( + $this->url, + array( + 'headers' => array( + 'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . ( $this->offset_in_remote_file + $bytes - 1 ), + ), + ) + ); + $this->expected_chunk_size = $bytes; + $this->offset_in_current_chunk = 0; + if ( false === $this->client->enqueue( $this->current_request ) ) { + // TODO: Think through error handling + return false; + } + return true; + } + + public function seek( $offset ) { + $this->offset_in_remote_file = $offset; + // @TODO cancel any pending requests + $this->current_request = null; + } + + public function tell() { + return $this->offset_in_remote_file; + } + + public function resolve_content_length() { + if ( null !== $this->remote_file_length ) { + return $this->remote_file_length; + } + + $request = new \WordPress\AsyncHttp\Request( + $this->url, + array( 'method' => 'HEAD' ) + ); + if ( false === $this->client->enqueue( $request ) ) { + // TODO: Think through error handling + return false; + } + while ( $this->client->await_next_event() ) { + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + $response = $request->response; + if ( false === $response ) { + return false; + } + $content_length = $response->get_header( 'Content-Length' ); + if ( false === $content_length ) { + return false; + } + return (int) $content_length; + } + } + return false; + } + + public function next_chunk() { + while ( $this->client->await_next_event() ) { + /** + * Only process events related to the most recent request. + * @TODO: Support redirects. + * @TODO: Cleanup resources for stale requests. + */ + if ( $this->current_request->id !== $this->client->get_request()->id ) { + continue; + } + + if ( $this->offset_in_current_chunk >= $this->expected_chunk_size ) { + // The remote server doesn't support range requests and sent us a chunk larger than expected. + // @TODO: Handle this case. Should we stream the entire file, or give up? + // Should we cache the download locally, or request the entire file again every + // time we need to seek()? + return false; + } + + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + $request = $this->client->get_request(); + if ( ! $request ) { + return false; + } + $response = $request->response; + if ( false === $response ) { + return false; + } + if ( + $response->status_code !== 206 || + false === $response->get_header( 'Range' ) + ) { + // The remote server doesn't support range requests + // @TODO: Handle this case. Should we stream the entire file, or give up? + // Should we cache the download locally, or request the entire file again every + // time we need to seek()? + return false; + } + break; + case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE: + $chunk = $this->client->get_response_body_chunk(); + if ( ! is_string( $chunk ) ) { + // TODO: Think through error handling + return false; + } + $this->current_chunk = $chunk; + $this->offset_in_remote_file += strlen( $chunk ); + $this->offset_in_current_chunk += strlen( $chunk ); + + return true; + case \WordPress\AsyncHttp\Client::EVENT_FAILED: + // TODO: Think through error handling. Errors are expected when working with + // the network. Should we auto retry? Make it easy for the caller to retry? + // Something else? + return false; + case \WordPress\AsyncHttp\Client::EVENT_FINISHED: + // TODO: Think through error handling + return false; + } + } + } + + public function get_bytes() { + return $this->current_chunk; + } +} diff --git a/src/WordPress/ByteReader/WP_Remote_File_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Reader.php new file mode 100644 index 00000000..156ddec5 --- /dev/null +++ b/src/WordPress/ByteReader/WP_Remote_File_Reader.php @@ -0,0 +1,115 @@ +client = new \WordPress\AsyncHttp\Client(); + $this->url = $url; + } + + public function length(): int { + throw new \Exception( 'Not implemented yet.' ); + } + + public function tell(): int { + return $this->bytes_already_read + $this->skip_bytes; + } + + public function seek( $offset_in_file ): bool { + if ( $this->request ) { + _doing_it_wrong( __METHOD__, 'Cannot set a remote file reader cursor on a remote file reader that is already initialized.', '1.0.0' ); + return false; + } + $this->skip_bytes = $offset_in_file; + return true; + } + + public function next_bytes(): bool { + if ( null === $this->request ) { + $this->request = new \WordPress\AsyncHttp\Request( + $this->url + ); + if ( false === $this->client->enqueue( $this->request ) ) { + // TODO: Think through error handling + return false; + } + } + + $this->after_chunk(); + + while ( $this->client->await_next_event() ) { + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE: + $chunk = $this->client->get_response_body_chunk(); + if ( ! is_string( $chunk ) ) { + // TODO: Think through error handling + return false; + } + $this->current_chunk = $chunk; + + /** + * Naive seek() implementation – redownload the file from the start + * and ignore bytes until we reach the desired offset. + * + * @TODO: Use the range requests instead when the server supports them. + */ + if ( $this->skip_bytes > 0 ) { + if ( $this->skip_bytes < strlen( $chunk ) ) { + $this->current_chunk = substr( $chunk, $this->skip_bytes ); + $this->bytes_already_read += $this->skip_bytes; + $this->skip_bytes = 0; + } else { + $this->skip_bytes -= strlen( $chunk ); + continue 2; + } + } + return true; + case \WordPress\AsyncHttp\Client::EVENT_FAILED: + // TODO: Think through error handling. Errors are expected when working with + // the network. Should we auto retry? Make it easy for the caller to retry? + // Something else? + $this->last_error = $this->client->get_request()->error; + return false; + case \WordPress\AsyncHttp\Client::EVENT_FINISHED: + $this->is_finished = true; + return false; + } + } + } + + private function after_chunk() { + if ( $this->current_chunk ) { + $this->bytes_already_read += strlen( $this->current_chunk ); + } + $this->current_chunk = null; + } + + public function get_last_error(): ?string { + return $this->last_error; + } + + public function get_bytes(): ?string { + return $this->current_chunk; + } + + public function is_finished(): bool { + return $this->is_finished; + } +} diff --git a/src/WordPress/Filesystem/WP_Abstract_Filesystem.php b/src/WordPress/Filesystem/WP_Abstract_Filesystem.php new file mode 100644 index 00000000..81d1992d --- /dev/null +++ b/src/WordPress/Filesystem/WP_Abstract_Filesystem.php @@ -0,0 +1,101 @@ + The contents of the directory. + */ + abstract public function ls($parent = '/'); + + /** + * Check if a path is a directory. + * + * @param string $path The path to check. + * @return bool True if the path is a directory, false otherwise. + */ + abstract public function is_dir($path); + + /** + * Check if a path is a file. + * + * @param string $path The path to check. + * @return bool True if the path is a file, false otherwise. + */ + abstract public function is_file($path); + + /** + * Start streaming a file. + * + * @example + * + * $fs->start_streaming_file($path); + * while($fs->next_file_chunk()) { + * $chunk = $fs->get_file_chunk(); + * // process $chunk + * } + * $fs->close_file_reader(); + * + * @param string $path The path to the file. + */ + abstract public function start_streaming_file($path); + + /** + * Get the next chunk of a file. + * + * @return string|false The next chunk of the file or false if the end of the file is reached. + */ + abstract public function next_file_chunk(); + + /** + * Get the current chunk of a file. + * + * @return string|false The current chunk of the file or false if no chunk is available. + */ + abstract public function get_file_chunk(); + + /** + * Get the error message of the filesystem. + * + * @return string|false The error message or false if no error occurred. + */ + abstract public function get_error_message(); + + /** + * Close the file reader. + */ + abstract public function close_file_reader(); + + /** + * Buffers the entire contents of a file into a string + * and returns it. + * + * @param string $path The path to the file. + * @return string|false The contents of the file or false if the file does not exist. + */ + public function read_file($path) { + $this->start_streaming_file($path); + $body = ''; + while($this->next_file_chunk()) { + $chunk = $this->get_file_chunk(); + if($chunk === false) { + return false; + } + $body .= $chunk; + } + $this->close_file_reader(); + return $body; + } + +} diff --git a/src/WordPress/Filesystem/WP_File_Visitor_Event.php b/src/WordPress/Filesystem/WP_File_Visitor_Event.php new file mode 100644 index 00000000..176d5795 --- /dev/null +++ b/src/WordPress/Filesystem/WP_File_Visitor_Event.php @@ -0,0 +1,32 @@ + + */ + public $files; + + const EVENT_ENTER = 'entering'; + const EVENT_EXIT = 'exiting'; + + public function __construct( $type, $dir, $files = array() ) { + $this->type = $type; + $this->dir = $dir; + $this->files = $files; + } + + public function is_entering() { + return $this->type === self::EVENT_ENTER; + } + + public function is_exiting() { + return $this->type === self::EVENT_EXIT; + } +} diff --git a/src/WordPress/Filesystem/WP_Filesystem.php b/src/WordPress/Filesystem/WP_Filesystem.php new file mode 100644 index 00000000..6373c164 --- /dev/null +++ b/src/WordPress/Filesystem/WP_Filesystem.php @@ -0,0 +1,74 @@ +last_file_reader) { + $this->last_file_reader->close(); + } + $this->last_file_reader = \WordPress\ByteReader\WP_File_Reader::create($path); + return $this->last_file_reader->next_bytes(); + } + + public function next_file_chunk() { + return $this->last_file_reader->next_bytes(); + } + + public function get_file_chunk() { + return $this->last_file_reader->get_bytes(); + } + + public function get_error_message() { + return $this->last_file_reader->get_last_error(); + } + + public function close_file_reader() { + if($this->last_file_reader) { + $this->last_file_reader->close(); + $this->last_file_reader = null; + } + } + +} diff --git a/src/WordPress/Filesystem/WP_Filesystem_Visitor.php b/src/WordPress/Filesystem/WP_Filesystem_Visitor.php new file mode 100644 index 00000000..e6403bed --- /dev/null +++ b/src/WordPress/Filesystem/WP_Filesystem_Visitor.php @@ -0,0 +1,91 @@ +filesystem = $filesystem; + $this->root_dir = $dir; + $this->iterator_stack[] = $this->create_iterator( $dir ); + } + + public function get_current_depth() { + return $this->depth; + } + + public function get_root_dir() { + return $this->root_dir; + } + + public function next() { + while ( ! empty( $this->iterator_stack ) ) { + $this->current_iterator = end( $this->iterator_stack ); + + if ( ! $this->current_iterator->valid() ) { + array_pop( $this->iterator_stack ); + continue; + } + $current = $this->current_iterator->current(); + $this->current_iterator->next(); + + if ( ! ( $current instanceof WP_File_Visitor_Event ) ) { + // It's a directory path, push a new iterator onto the stack + $this->iterator_stack[] = $this->create_iterator( $current ); + continue; + } + + if ( $current->is_entering() ) { + ++$this->depth; + } + $this->current_event = $current; + if ( $current->is_exiting() ) { + --$this->depth; + } + return true; + } + + return false; + } + + public function get_event(): ?WP_File_Visitor_Event { + return $this->current_event; + } + + private function create_iterator( $dir ) { + $this->directories = array(); + $this->files = array(); + + $filesystem = $this->filesystem; + $children = $filesystem->ls($dir); + if ( $children === false ) { + return new \ArrayIterator( array() ); + } + + foreach($children as $child) { + if ( $filesystem->is_dir( $dir . '/' . $child ) ) { + $this->directories[] = $child; + continue; + } + $this->files[] = $child; + } + + $events = array(); + $events[] = new WP_File_Visitor_Event( WP_File_Visitor_Event::EVENT_ENTER, $dir, $this->files ); + $prefix = $dir === '/' ? '' : $dir; + foreach ( $this->directories as $directory ) { + $events[] = $prefix . '/' . $directory; // Placeholder for recursion + } + $events[] = new WP_File_Visitor_Event( WP_File_Visitor_Event::EVENT_EXIT, $dir ); + return new \ArrayIterator( $events ); + } + +} diff --git a/src/WordPress/Filesystem/WP_Zip_Filesystem.php b/src/WordPress/Filesystem/WP_Zip_Filesystem.php new file mode 100644 index 00000000..d7caa708 --- /dev/null +++ b/src/WordPress/Filesystem/WP_Zip_Filesystem.php @@ -0,0 +1,270 @@ +zip = new ZipStreamReader($byte_reader); + $this->byte_reader = $byte_reader; + } + + public function ls($parent = '/') { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + + $descendants = $this->central_directory; + + // Only keep the descendants of the given parent. + $parent = trim($parent, '/') ; + $prefix = $parent ? $parent . '/' : ''; + if(strlen($prefix) > 1) { + $filtered_descendants = []; + foreach($descendants as $entry) { + $path = $entry['path']; + if(strpos($path, $prefix) !== 0) { + continue; + } + $filtered_descendants[] = $entry; + } + $descendants = $filtered_descendants; + } + + // Only keep the direct children of the parent. + $children = []; + foreach($descendants as $entry) { + $suffix = substr($entry['path'], strlen($prefix)); + if(str_contains($suffix, '/')) { + continue; + } + // No need to include the directory itself. + if(strlen($suffix) === 0) { + continue; + } + $children[] = $suffix; + } + return $children; + } + + public function is_dir($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_DIR === $this->central_directory[$path]['type']; + } + + public function is_file($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_FILE === $this->central_directory[$path]['type']; + } + + public function start_streaming_file($path) { + $this->opened_file_finished = false; + $this->file_chunk = null; + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + if(!isset($this->central_directory[$path])) { + _doing_it_wrong( + __METHOD__, + sprintf('File %s not found', $path), + '1.0.0' + ); + return false; + } + if(self::TYPE_FILE !== $this->central_directory[$path]['type']) { + _doing_it_wrong( + __METHOD__, + sprintf('Path %s is not a file', $path), + '1.0.0' + ); + return false; + } + return $this->zip->seek_to_record($this->central_directory[$path]['firstByteAt']); + } + + public function next_file_chunk() { + if ( $this->state === self::STATE_ERROR ) { + return false; + } + if ( $this->opened_file_finished ) { + $this->file_chunk = null; + return false; + } + if ( false === $this->zip->next() ) { + return false; + } + if ( NewZipStreamReader::STATE_FILE_ENTRY !== $this->zip->get_state() ) { + return false; + } + $this->file_chunk = $this->zip->get_file_body_chunk(); + if($this->zip->count_remaining_file_body_bytes() === 0) { + $this->opened_file_finished = true; + } + return true; + } + + public function get_file_chunk(): string { + return $this->file_chunk ?? ''; + } + + public function get_error_message() { + return $this->error_message; + } + + private function load_central_directory() { + if($this->state === self::STATE_ERROR) { + return false; + } + if(null !== $this->central_directory) { + return true; + } + + if($this->central_directory_size() >= self::MAX_CENTRAL_DIRECTORY_SIZE) { + return false; + } + + // Read the central directory into memory. + if(false === $this->seek_to_central_directory_index()) { + return false; + } + + $central_directory = array(); + while($this->zip->next()) { + if(NewZipStreamReader::STATE_CENTRAL_DIRECTORY_ENTRY !== $this->zip->get_state()) { + continue; + } + $central_directory[] = $this->zip->get_header(); + } + + // Transform the central directory into a tree structure with + // directories and files. + foreach($central_directory as $entry) { + /** + * Directory are sometimes indicated by a path + * ending with a right trailing slash. Let's remove it + * to avoid an empty entry at the end of $path_segments. + */ + $path_segments = explode('/', $entry['path']); + + for($i=0; $i < count($path_segments)-1; $i++) { + $path_so_far = implode('/', array_slice($path_segments, 0, $i + 1)); + if(isset($this->central_directory[$path_so_far])) { + if(self::TYPE_DIR !== $this->central_directory[$path_so_far]['type']) { + $this->set_error('Path stored both as a file and a directory: ' . $path_so_far); + return false; + } + } + $this->central_directory[$path_so_far] = array( + 'path' => $path_so_far, + 'type' => self::TYPE_DIR, + ); + } + /** + * Only create a file entry if it's not a directory. + */ + if(!str_ends_with($entry['path'], '/')) { + $this->central_directory[$entry['path']] = $entry; + $this->central_directory[$entry['path']]['type'] = self::TYPE_FILE; + } + } + + return true; + } + + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + } + + private function central_directory_size() { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->central_directory_end_header['centralDirectorySize']; + } + + private function seek_to_central_directory_index() + { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->zip->seek_to_record($this->central_directory_end_header['centralDirectoryOffset']); + } + + private function collect_central_directory_end_header() { + if( null !== $this->central_directory_end_header ) { + return true; + } + + $length = $this->byte_reader->length(); + if(true !== $this->zip->seek_to_record($length - 22)) { + return false; + } + if(true !== $this->zip->next()) { + return false; + } + if($this->zip->get_state() !== NewZipStreamReader::STATE_END_CENTRAL_DIRECTORY_ENTRY) { + return false; + } + + $this->central_directory_end_header = $this->zip->get_header(); + return true; + } + + public function close_file_reader() { + return true; + } + +} diff --git a/src/WordPress/Zip/NewZipStreamReader.php b/src/WordPress/Zip/NewZipStreamReader.php deleted file mode 100644 index b49552b2..00000000 --- a/src/WordPress/Zip/NewZipStreamReader.php +++ /dev/null @@ -1,357 +0,0 @@ - $this->file_path, - 'zip_file_bytes_parsed_so_far' => $this->zip_file_bytes_parsed_so_far, - 'file_entry_body_bytes_parsed_so_far' => $this->file_entry_body_bytes_parsed_so_far, - 'state' => $this->state, - 'header' => $this->header, - 'file_body_chunk' => $this->file_body_chunk, - 'paused_incomplete_input' => $this->paused_incomplete_input, - ]; - } - - public function resume($paused) { - $this->file_path = $paused['file_path']; - $this->zip_file_bytes_parsed_so_far = 0; - $this->state = $paused['state']; - $this->header = $paused['header']; - $this->file_body_chunk = $paused['file_body_chunk']; - $this->paused_incomplete_input = $paused['paused_incomplete_input']; - - $this->fp = fopen($this->file_path, 'rb'); - if($paused['file_entry_body_bytes_parsed_so_far'] > 0) { - $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); - $file_starts_at = $paused['zip_file_bytes_parsed_so_far'] - $paused['file_entry_body_bytes_parsed_so_far']; - $this->zip_file_bytes_parsed_so_far = $file_starts_at; - fseek($this->fp, $file_starts_at); - while(true) { - $missing_bytes = $paused['file_entry_body_bytes_parsed_so_far'] - $this->file_entry_body_bytes_parsed_so_far; - $missing_bytes = max(0, min(4096, $missing_bytes)); - if($missing_bytes === 0) { - break; - } - $this->read_file_entry_body_chunk($missing_bytes); - } - } else { - $this->zip_file_bytes_parsed_so_far = $paused['zip_file_bytes_parsed_so_far']; - fseek($this->fp, $this->zip_file_bytes_parsed_so_far); - } - } - - public function __construct($file_path) { - $this->file_path = $file_path; - } - - public function is_paused_at_incomplete_input(): bool { - return $this->paused_incomplete_input; - } - - public function is_finished(): bool - { - return self::STATE_COMPLETE === $this->state || self::STATE_ERROR === $this->state; - } - - public function get_state() - { - return $this->state; - } - - public function get_header() - { - return $this->header; - } - - public function get_file_path() - { - if(!$this->header) { - return null; - } - - return $this->header['path']; - } - - public function get_file_body_chunk() - { - return $this->file_body_chunk; - } - - public function get_last_error(): ?string - { - return $this->error_message; - } - - public function next() - { - do { - if(self::STATE_SCAN === $this->state) { - if(false === $this->scan()) { - return false; - } - } - - switch ($this->state) { - case self::STATE_ERROR: - case self::STATE_COMPLETE: - return false; - - case self::STATE_FILE_ENTRY: - if (false === $this->read_file_entry()) { - return false; - } - break; - - case self::STATE_CENTRAL_DIRECTORY_ENTRY: - if (false === $this->read_central_directory_entry()) { - return false; - } - break; - - case self::STATE_END_CENTRAL_DIRECTORY_ENTRY: - if (false === $this->read_end_central_directory_entry()) { - return false; - } - break; - - default: - return false; - } - } while (self::STATE_SCAN === $this->state); - - return true; - } - - private function read_central_directory_entry() - { - if ($this->header && !empty($this->header['path'])) { - $this->header = null; - $this->state = self::STATE_SCAN; - return; - } - - if (!$this->header) { - $data = $this->consume_bytes(42); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', - $data - ); - } - - if($this->header) { - $n = $this->header['pathLength'] + $this->header['extraLength'] + $this->header['fileCommentLength']; - $this->header['path'] = $this->consume_bytes($this->header['pathLength']); - $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); - $this->header['fileComment'] = $this->consume_bytes($this->header['fileCommentLength']); - if(!$this->header['path']) { - $this->set_error('Empty path in central directory entry'); - } - } - } - - private function read_end_central_directory_entry() - { - if ($this->header && ( !empty($this->header['comment']) || 0 === $this->header['commentLength'] )) { - $this->header = null; - $this->state = self::STATE_SCAN; - return; - } - - if(!$this->header) { - $data = $this->consume_bytes(18); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', - $data - ); - } - - if($this->header && empty($this->header['comment']) && $this->header['commentLength'] > 0) { - $comment = $this->consume_bytes($this->header['commentLength']); - if(false === $comment) { - $this->paused_incomplete_input = true; - return false; - } - $this->header['comment'] = $comment; - } - } - - private function scan() { - $signature = $this->consume_bytes(4); - if ($signature === false || 0 === strlen($signature)) { - $this->paused_incomplete_input = true; - return false; - } - $signature = unpack('V', $signature)[1]; - switch($signature) { - case self::SIGNATURE_FILE: - $this->state = self::STATE_FILE_ENTRY; - break; - case self::SIGNATURE_CENTRAL_DIRECTORY: - $this->state = self::STATE_CENTRAL_DIRECTORY_ENTRY; - break; - case self::SIGNATURE_CENTRAL_DIRECTORY_END: - $this->state = self::STATE_END_CENTRAL_DIRECTORY_ENTRY; - break; - default: - $this->set_error('Invalid signature ' . $signature); - return false; - } - } - - /** - * Reads a file entry from a zip file. - * - * The file entry is structured as follows: - * - * ``` - * Offset Bytes Description - * 0 4 Local file header signature = 0x04034b50 (PK♥♦ or "PK\3\4") - * 4 2 Version needed to extract (minimum) - * 6 2 General purpose bit flag - * 8 2 Compression method; e.g. none = 0, DEFLATE = 8 (or "\0x08\0x00") - * 10 2 File last modification time - * 12 2 File last modification date - * 14 4 CRC-32 of uncompressed data - * 18 4 Compressed size (or 0xffffffff for ZIP64) - * 22 4 Uncompressed size (or 0xffffffff for ZIP64) - * 26 2 File name length (n) - * 28 2 Extra field length (m) - * 30 n File name - * 30+n m Extra field - * ``` - * - * @param resource $stream - */ - private function read_file_entry() - { - if(false === $this->read_file_entry_header()) { - return false; - } - if(false === $this->read_file_entry_body_chunk()) { - return false; - } - } - - private function read_file_entry_header() { - if (null === $this->header) { - $data = $this->consume_bytes(26); - if ($data === false) { - $this->paused_incomplete_input = true; - return false; - } - $this->header = unpack( - 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', - $data - ); - $this->file_entry_body_bytes_parsed_so_far = 0; - } - - if($this->header && empty($this->header['path'])) { - $this->header['path'] = $this->consume_bytes($this->header['pathLength']); - $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); - if($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { - $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); - } - } - } - - private function read_file_entry_body_chunk($max_bytes_to_read=4096) { - $this->file_body_chunk = null; - - $file_body_bytes_left = $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; - if($file_body_bytes_left === 0) { - $this->header = null; - $this->inflate_handle = null; - $this->file_entry_body_bytes_parsed_so_far = 0; - $this->state = self::STATE_SCAN; - return; - } - - $chunk_size = min($max_bytes_to_read, $file_body_bytes_left); - $compressed_bytes = $this->consume_bytes($chunk_size); - $this->file_entry_body_bytes_parsed_so_far += strlen($compressed_bytes); - - if ($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { - $uncompressed_bytes = inflate_add($this->inflate_handle, $compressed_bytes, ZLIB_PARTIAL_FLUSH); - if ( $uncompressed_bytes === false || inflate_get_status( $this->inflate_handle ) === false ) { - $this->set_error('Failed to inflate'); - return false; - } - } else { - $uncompressed_bytes = $compressed_bytes; - } - - $this->file_body_chunk = $uncompressed_bytes; - } - - private function set_error($message) { - $this->state = self::STATE_ERROR; - $this->error_message = $message; - $this->paused_incomplete_input = false; - } - - private function consume_bytes($n) { - if(0 === $n) { - return ''; - } - if(null === $this->fp) { - $this->fp = fopen($this->file_path, 'rb'); - } - - $this->zip_file_bytes_parsed_so_far += $n; - $bytes_read = fread($this->fp, $n); - if(false === $bytes_read || '' === $bytes_read) { - fclose($this->fp); - $this->state = self::STATE_COMPLETE; - return false; - } - return $bytes_read; - } - -} diff --git a/src/WordPress/Zip/WP_Zip_Filesystem.php b/src/WordPress/Zip/WP_Zip_Filesystem.php new file mode 100644 index 00000000..d55f9030 --- /dev/null +++ b/src/WordPress/Zip/WP_Zip_Filesystem.php @@ -0,0 +1,271 @@ +zip = new ZipStreamReader($byte_reader); + $this->byte_reader = $byte_reader; + } + + public function ls($parent = '/') { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + + $descendants = $this->central_directory; + + // Only keep the descendants of the given parent. + $parent = trim($parent, '/') ; + $prefix = $parent ? $parent . '/' : ''; + if(strlen($prefix) > 1) { + $filtered_descendants = []; + foreach($descendants as $entry) { + $path = $entry['path']; + if(strpos($path, $prefix) !== 0) { + continue; + } + $filtered_descendants[] = $entry; + } + $descendants = $filtered_descendants; + } + + // Only keep the direct children of the parent. + $children = []; + foreach($descendants as $entry) { + $suffix = substr($entry['path'], strlen($prefix)); + if(str_contains($suffix, '/')) { + continue; + } + // No need to include the directory itself. + if(strlen($suffix) === 0) { + continue; + } + $children[] = $suffix; + } + return $children; + } + + public function is_dir($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_DIR === $this->central_directory[$path]['type']; + } + + public function is_file($path) { + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + return isset($this->central_directory[$path]) && self::TYPE_FILE === $this->central_directory[$path]['type']; + } + + public function start_streaming_file($path) { + $this->opened_file_finished = false; + $this->file_chunk = null; + if($this->state === self::STATE_ERROR) { + return false; + } + if(false === $this->load_central_directory()) { + return false; + } + $path = trim($path, '/'); + if(!isset($this->central_directory[$path])) { + _doing_it_wrong( + __METHOD__, + sprintf('File %s not found', $path), + '1.0.0' + ); + return false; + } + if(self::TYPE_FILE !== $this->central_directory[$path]['type']) { + _doing_it_wrong( + __METHOD__, + sprintf('Path %s is not a file', $path), + '1.0.0' + ); + return false; + } + return $this->zip->seek_to_record($this->central_directory[$path]['firstByteAt']); + } + + public function next_file_chunk() { + if ( $this->state === self::STATE_ERROR ) { + return false; + } + if ( $this->opened_file_finished ) { + $this->file_chunk = null; + return false; + } + if ( false === $this->zip->next() ) { + return false; + } + if ( ZipStreamReader::STATE_FILE_ENTRY !== $this->zip->get_state() ) { + return false; + } + $this->file_chunk = $this->zip->get_file_body_chunk(); + if($this->zip->count_remaining_file_body_bytes() === 0) { + $this->opened_file_finished = true; + } + return true; + } + + public function get_file_chunk(): string { + return $this->file_chunk ?? ''; + } + + public function get_error_message() { + return $this->error_message; + } + + private function load_central_directory() { + if($this->state === self::STATE_ERROR) { + return false; + } + if(null !== $this->central_directory) { + return true; + } + + if($this->central_directory_size() >= self::MAX_CENTRAL_DIRECTORY_SIZE) { + return false; + } + + // Read the central directory into memory. + if(false === $this->seek_to_central_directory_index()) { + return false; + } + + $central_directory = array(); + while($this->zip->next()) { + if(ZipStreamReader::STATE_CENTRAL_DIRECTORY_ENTRY !== $this->zip->get_state()) { + continue; + } + $central_directory[] = $this->zip->get_header(); + } + + // Transform the central directory into a tree structure with + // directories and files. + foreach($central_directory as $entry) { + /** + * Directory are sometimes indicated by a path + * ending with a right trailing slash. Let's remove it + * to avoid an empty entry at the end of $path_segments. + */ + $path_segments = explode('/', $entry['path']); + + for($i=0; $i < count($path_segments)-1; $i++) { + $path_so_far = implode('/', array_slice($path_segments, 0, $i + 1)); + if(isset($this->central_directory[$path_so_far])) { + if(self::TYPE_DIR !== $this->central_directory[$path_so_far]['type']) { + $this->set_error('Path stored both as a file and a directory: ' . $path_so_far); + return false; + } + } + $this->central_directory[$path_so_far] = array( + 'path' => $path_so_far, + 'type' => self::TYPE_DIR, + ); + } + /** + * Only create a file entry if it's not a directory. + */ + if(!str_ends_with($entry['path'], '/')) { + $this->central_directory[$entry['path']] = $entry; + $this->central_directory[$entry['path']]['type'] = self::TYPE_FILE; + } + } + + return true; + } + + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + } + + private function central_directory_size() { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->central_directory_end_header['centralDirectorySize']; + } + + private function seek_to_central_directory_index() + { + if(false === $this->collect_central_directory_end_header()) { + return false; + } + + return $this->zip->seek_to_record($this->central_directory_end_header['centralDirectoryOffset']); + } + + private function collect_central_directory_end_header() { + if( null !== $this->central_directory_end_header ) { + return true; + } + + $length = $this->byte_reader->length(); + if(true !== $this->zip->seek_to_record($length - 22)) { + return false; + } + if(true !== $this->zip->next()) { + return false; + } + if($this->zip->get_state() !== ZipStreamReader::STATE_END_CENTRAL_DIRECTORY_ENTRY) { + return false; + } + + $this->central_directory_end_header = $this->zip->get_header(); + return true; + } + + public function close_file_reader() { + return true; + } + +} diff --git a/src/WordPress/Zip/ZipStreamReader.php b/src/WordPress/Zip/ZipStreamReader.php index daf087b5..a6b8437d 100644 --- a/src/WordPress/Zip/ZipStreamReader.php +++ b/src/WordPress/Zip/ZipStreamReader.php @@ -2,6 +2,11 @@ namespace WordPress\Zip; +use WordPress\ByteReader\WP_Byte_Reader; + +/** + * + */ class ZipStreamReader { const SIGNATURE_FILE = 0x04034b50; @@ -9,28 +14,217 @@ class ZipStreamReader { const SIGNATURE_CENTRAL_DIRECTORY_END = 0x06054b50; const COMPRESSION_DEFLATE = 8; - /** - * Reads the next zip entry from a stream of zip file bytes. - * - * @param resource $fp A stream of zip file bytes. - */ - public static function readEntry( $fp ) { - $signature = static::read_bytes( $fp, 4 ); - if ( $signature === false ) { - return null; + private $state = ZipStreamReader::STATE_SCAN; + private $header = null; + private $file_body_chunk = null; + private $paused_incomplete_input = false; + private $error_message; + private $inflate_handle; + private $last_record_at = null; + private $byte_reader; + private $byte_buffer = ''; + private $file_bytes_consumed_so_far = 0; + private $file_entry_body_bytes_parsed_so_far = 0; + + const STATE_SCAN = 'scan'; + const STATE_FILE_ENTRY = 'file-entry'; + const STATE_CENTRAL_DIRECTORY_ENTRY = 'central-directory-entry'; + const STATE_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'central-directory-entry-extra'; + const STATE_END_CENTRAL_DIRECTORY_ENTRY = 'end-central-directory-entry'; + const STATE_END_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'end-central-directory-entry-extra'; + const STATE_COMPLETE = 'complete'; + const STATE_ERROR = 'error'; + + public function __construct(WP_Byte_Reader $byte_reader) { + $this->byte_reader = $byte_reader; + } + + public function is_paused_at_incomplete_input(): bool { + return $this->paused_incomplete_input; + } + + public function is_finished(): bool + { + return self::STATE_COMPLETE === $this->state || self::STATE_ERROR === $this->state; + } + + public function get_state() + { + return $this->state; + } + + public function get_header() + { + return $this->header; + } + + public function get_file_path() + { + if(!$this->header) { + return null; + } + + return $this->header['path']; + } + + public function get_file_body_chunk() + { + return $this->file_body_chunk; + } + + public function count_remaining_file_body_bytes() { + return $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; + } + + public function get_last_error(): ?string + { + return $this->error_message; + } + + public function next() + { + do { + if(self::STATE_SCAN === $this->state) { + if(false === $this->scan()) { + return false; + } + } + + switch ($this->state) { + case self::STATE_ERROR: + case self::STATE_COMPLETE: + return false; + + case self::STATE_FILE_ENTRY: + if (false === $this->read_file_entry()) { + return false; + } + break; + + case self::STATE_CENTRAL_DIRECTORY_ENTRY: + if (false === $this->read_central_directory_entry()) { + return false; + } + break; + + case self::STATE_END_CENTRAL_DIRECTORY_ENTRY: + if (false === $this->read_end_central_directory_entry()) { + return false; + } + break; + + default: + return false; + } + } while (self::STATE_SCAN === $this->state); + + return true; + } + + public function seek_to_record($record_offset) { + $this->after_record(); + if( false === $this->byte_reader->seek($record_offset) ) { + return false; + } + $this->byte_buffer = ''; + $this->file_bytes_consumed_so_far = $record_offset; + return true; + } + + public function tell() { + return $this->last_record_at; + } + + private function after_record() { + $this->state = self::STATE_SCAN; + $this->header = null; + // @TODO: Does the inflate_handle need an fclose() or so call? + $this->inflate_handle = null; + $this->file_body_chunk = null; + $this->file_entry_body_bytes_parsed_so_far = 0; + } + + private function read_central_directory_entry() + { + if ($this->header && ! empty($this->header['path'])) { + $this->after_record(); + return; } - $signature = unpack( 'V', $signature )[1]; - if ( $signature === static::SIGNATURE_FILE ) { - return static::readFileEntry( $fp ); - } elseif ( $signature === static::SIGNATURE_CENTRAL_DIRECTORY ) { - return static::readCentralDirectoryEntry( $fp, true ); - } elseif ( $signature === static::SIGNATURE_CENTRAL_DIRECTORY_END ) { - return static::readEndCentralDirectoryEntry( $fp, true ); + + if (!$this->header) { + $data = $this->consume_bytes(42); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', + $data + ); } - return null; + if($this->header) { + $this->header['path'] = $this->sanitize_path($this->consume_bytes($this->header['pathLength'])); + $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); + $this->header['fileComment'] = $this->consume_bytes($this->header['fileCommentLength']); + if(!$this->header['path']) { + $this->set_error('Empty path in central directory entry'); + } + } } + private function read_end_central_directory_entry() + { + if ($this->header && ( !empty($this->header['comment']) || 0 === $this->header['commentLength'] )) { + $this->after_record(); + return; + } + + if(!$this->header) { + $data = $this->consume_bytes(18); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', + $data + ); + } + + if($this->header && empty($this->header['comment']) && $this->header['commentLength'] > 0) { + $comment = $this->consume_bytes($this->header['commentLength']); + if(false === $comment) { + $this->paused_incomplete_input = true; + return false; + } + $this->header['comment'] = $comment; + } + } + + private function scan() { + $this->last_record_at = $this->file_bytes_consumed_so_far; + $signature = $this->consume_bytes(4); + if ($signature === false || 0 === strlen($signature)) { + $this->paused_incomplete_input = true; + return false; + } + $signature = unpack('V', $signature)[1]; + switch($signature) { + case self::SIGNATURE_FILE: + $this->state = self::STATE_FILE_ENTRY; + break; + case self::SIGNATURE_CENTRAL_DIRECTORY: + $this->state = self::STATE_CENTRAL_DIRECTORY_ENTRY; + break; + case self::SIGNATURE_CENTRAL_DIRECTORY_END: + $this->state = self::STATE_END_CENTRAL_DIRECTORY_ENTRY; + break; + default: + $this->set_error('Invalid signature ' . $signature); + return false; + } + } /** * Reads a file entry from a zip file. @@ -56,164 +250,123 @@ public static function readEntry( $fp ) { * * @param resource $stream */ - protected static function readFileEntry( $stream ): ZipFileEntry { - $data = self::read_bytes( $stream, 26 ); - $data = unpack( - 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', - $data - ); - $path = self::read_bytes( $stream, $data['pathLength'] ); - $extra = self::read_bytes( $stream, $data['extraLength'] ); - $bytes = self::read_bytes( $stream, $data['compressedSize'] ); - - if ( $data['compressionMethod'] === static::COMPRESSION_DEFLATE ) { - try { - $bytes = gzinflate( $bytes ); - } catch ( \Throwable $e ) { - // Ignore the error + private function read_file_entry() + { + if(false === $this->read_file_entry_header()) { + return false; + } + if(false === $this->read_file_entry_body_chunk()) { + return false; + } + } + + private function read_file_entry_header() { + if (null === $this->header) { + $data = $this->consume_bytes(26); + if ($data === false) { + $this->paused_incomplete_input = true; + return false; + } + $this->header = unpack( + 'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength', + $data + ); + $this->file_entry_body_bytes_parsed_so_far = 0; + } + + if($this->header && empty($this->header['path'])) { + $this->header['path'] = $this->sanitize_path($this->consume_bytes($this->header['pathLength'])); + $this->header['extra'] = $this->consume_bytes($this->header['extraLength']); + if($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { + $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); + } + } + } + + private function read_file_entry_body_chunk($max_bytes_to_read=4096) { + $this->file_body_chunk = null; + + $file_body_bytes_left = $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far; + if($file_body_bytes_left === 0) { + $this->after_record(); + return; + } + + $chunk_size = min($max_bytes_to_read, $file_body_bytes_left); + $compressed_bytes = $this->consume_bytes($chunk_size); + $this->file_entry_body_bytes_parsed_so_far += strlen($compressed_bytes); + + if ($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) { + if(!$this->inflate_handle) { + $this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW); + } + $uncompressed_bytes = inflate_add($this->inflate_handle, $compressed_bytes, ZLIB_PARTIAL_FLUSH); + if ( $uncompressed_bytes === false || inflate_get_status( $this->inflate_handle ) === false ) { + $this->set_error('Failed to inflate'); + return false; } + } else { + $uncompressed_bytes = $compressed_bytes; } - return new ZipFileEntry( - $data['versionNeeded'], - $data['generalPurpose'], - $data['compressionMethod'], - $data['lastModifiedTime'], - $data['lastModifiedDate'], - $data['crc'], - $data['compressedSize'], - $data['uncompressedSize'], - $path, - $extra, - $bytes - ); + $this->file_body_chunk = $uncompressed_bytes; } - /** - * Reads a central directory entry from a zip file. - * - * The central directory entry is structured as follows: - * - * ``` - * Offset Bytes Description - * 0 4 Central directory file header signature = 0x02014b50 - * 4 2 Version made by - * 6 2 Version needed to extract (minimum) - * 8 2 General purpose bit flag - * 10 2 Compression method - * 12 2 File last modification time - * 14 2 File last modification date - * 16 4 CRC-32 of uncompressed data - * 20 4 Compressed size (or 0xffffffff for ZIP64) - * 24 4 Uncompressed size (or 0xffffffff for ZIP64) - * 28 2 File name length (n) - * 30 2 Extra field length (m) - * 32 2 File comment length (k) - * 34 2 Disk number where file starts (or 0xffff for ZIP64) - * 36 2 Internal file attributes - * 38 4 External file attributes - * 42 4 Relative offset of local file header (or 0xffffffff for ZIP64). This is the number of bytes between the start of the first disk on which the file occurs, and the start of the local file header. This allows software reading the central directory to locate the position of the file inside the ZIP file. - * 46 n File name - * 46+n m Extra field - * 46+n+m k File comment - * ``` - * - * @param resource stream - */ - protected static function readCentralDirectoryEntry( $stream ): ZipCentralDirectoryEntry { - $data = static::read_bytes( $stream, 42 ); - $data = unpack( - 'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt', - $data - ); - $path = static::read_bytes( $stream, $data['pathLength'] ); - $extra = static::read_bytes( $stream, $data['extraLength'] ); - $fileComment = static::read_bytes( $stream, $data['fileCommentLength'] ); - - return new ZipCentralDirectoryEntry( - $data['versionCreated'], - $data['versionNeeded'], - $data['generalPurpose'], - $data['compressionMethod'], - $data['lastModifiedTime'], - $data['lastModifiedDate'], - $data['crc'], - $data['compressedSize'], - $data['uncompressedSize'], - $data['diskNumber'], - $data['internalAttributes'], - $data['externalAttributes'], - $data['firstByteAt'], - $data['firstByteAt'] + 30 + $data['pathLength'] + $data['fileCommentLength'] + $data['extraLength'] + $data['compressionMethod'] - 1, - $path, - $extra, - $fileComment - ); + private function set_error($message) { + $this->state = self::STATE_ERROR; + $this->error_message = $message; + $this->paused_incomplete_input = false; } /** - * Reads the end of central directory entry from a zip file. + * Normalizes the parsed path to prevent directory traversal, + * a.k.a zip slip attacks. * - * The end of central directory entry is structured as follows: + * In ZIP, paths are arbitrary byte sequences. Nothing prevents + * a ZIP file from containing a path such as /etc/passwd or + * ../../../../etc/passwd. * - * ``` - * Offset Bytes Description[33] - * 0 4 End of central directory signature = 0x06054b50 - * 4 2 Number of this disk (or 0xffff for ZIP64) - * 6 2 Disk where central directory starts (or 0xffff for ZIP64) - * 8 2 Number of central directory records on this disk (or 0xffff for ZIP64) - * 10 2 Total number of central directory records (or 0xffff for ZIP64) - * 12 4 Size of central directory (bytes) (or 0xffffffff for ZIP64) - * 16 4 Offset of start of central directory, relative to start of archive (or 0xffffffff for ZIP64) - * 20 2 Comment length (n) - * 22 n Comment - * ``` - * - * @param resource $stream + * This function normalizes paths found in the ZIP file. + * + * @TODO: Scrutinize the implementation of this function. Consider + * unicode characters in the path, including ones that are + * just embelishments of the following character. Consider + * the impact of **all** seemingly "invalid" byte sequences, + * e.g. spaces, ASCII control characters, etc. What will the + * OS do when it receives a path containing .{null byte}./etc/passwd? */ - protected static function readEndCentralDirectoryEntry( $stream ): ZipEndCentralDirectoryEntry { - $data = static::read_bytes( $stream, 18 ); - $data = unpack( - 'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength', - $data - ); - - return new ZipEndCentralDirectoryEntry( - $data['diskNumber'], - $data['centralDirectoryStartDisk'], - $data['numberCentralDirectoryRecordsOnThisDisk'], - $data['numberCentralDirectoryRecords'], - $data['centralDirectorySize'], - $data['centralDirectoryOffset'], - static::read_bytes( $stream, $data['commentLength'] ) - ); + private function sanitize_path($path) { + // Replace multiple slashes with a single slash. + $path = preg_replace('#/+#', '/', $path); + // Remove all the leading ../ segments. + $path = preg_replace('#^(\.\./)+#', '', $path); + // Remove all the /./ and /../ segments. + $path = preg_replace('#/\.\.?/#', '/', $path); + return $path; } - /** - * Reads a fixed number of bytes from a stream. - * Unlike fread(), this function will block until enough bytes are available. - * - * @param $stream - * @param $length - * - * @return false|string - */ - protected static function read_bytes( $stream, $length ) { - if ( $length === 0 ) { + private function consume_bytes($n) { + if(0 === $n) { return ''; } - $data = ''; - $remaining_length = $length; - while ( $remaining_length > 0 ) { - $chunk = fread( $stream, $remaining_length ); - if ( false === $chunk || ( '' === $chunk && feof( $stream ) ) ) { - return strlen( $data ) ? $data : false; + if (strlen($this->byte_buffer) < $n) { + if (!$this->byte_reader->next_bytes()) { + if ($this->byte_reader->is_finished()) { + $this->state = self::STATE_COMPLETE; + } else { + $this->paused_incomplete_input = true; + } + return false; } - $remaining_length -= strlen( $chunk ); - $data .= $chunk; + $this->byte_buffer .= $this->byte_reader->get_bytes(); } - return $data; + $bytes = substr($this->byte_buffer, 0, $n); + $this->byte_buffer = substr($this->byte_buffer, $n); + $this->file_bytes_consumed_so_far += $n; + return $bytes; } + } + diff --git a/src/WordPress/Zip/functions.php b/src/WordPress/Zip/functions.php deleted file mode 100644 index 226111df..00000000 --- a/src/WordPress/Zip/functions.php +++ /dev/null @@ -1,36 +0,0 @@ -isFileEntry() ) { - continue; - } - - $path = Path::canonicalize( $to_path . '/' . $entry->path ); - $parent = Path::getDirectory( $path ); - if ( ! is_dir( $parent ) ) { - if(is_file($parent)) { - unlink($parent); - } - mkdir( $parent, 0777, true ); - } - - if ( $entry->isDirectory ) { - if ( ! is_dir( $path ) ) { - mkdir( $path, 0777, true ); - } - } else { - file_put_contents( $path, $entry->bytes ); - } - } - - return feof( $fp ) ? 1 : 0; -} From 9f0fee1d05fd6273d50b66f9b2ef2c097e0723cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 16 Dec 2024 22:23:32 +0100 Subject: [PATCH 2/4] Turn WP_Remote_File_Ranged_Reader into a proper byte source --- src/WordPress/AsyncHttp/Request.php | 10 + src/WordPress/ByteReader/WP_Byte_Reader.php | 23 +- src/WordPress/ByteReader/WP_File_Reader.php | 4 +- .../WP_Remote_File_Ranged_Reader.php | 271 +++++++++--------- .../ByteReader/WP_Remote_File_Reader.php | 83 +++++- 5 files changed, 243 insertions(+), 148 deletions(-) diff --git a/src/WordPress/AsyncHttp/Request.php b/src/WordPress/AsyncHttp/Request.php index ddd21386..78e168ba 100644 --- a/src/WordPress/AsyncHttp/Request.php +++ b/src/WordPress/AsyncHttp/Request.php @@ -62,6 +62,16 @@ public function __construct( string $url, $request_info = array() ) { } } + public function get_request_info() { + return [ + 'http_version' => $this->http_version, + 'method' => $this->method, + 'headers' => $this->headers, + 'body_stream' => $this->upload_body_stream, + 'redirected_from' => $this->redirected_from, + ]; + } + public function latest_redirect() { $request = $this; while ( $request->redirected_to ) { diff --git a/src/WordPress/ByteReader/WP_Byte_Reader.php b/src/WordPress/ByteReader/WP_Byte_Reader.php index 77fdeeb6..dc47f733 100644 --- a/src/WordPress/ByteReader/WP_Byte_Reader.php +++ b/src/WordPress/ByteReader/WP_Byte_Reader.php @@ -5,12 +5,19 @@ /** * Experimental interface for streaming, seekable byte readers. */ -interface WP_Byte_Reader { - public function length(): int; - public function tell(): int; - public function seek( int $offset ): bool; - public function is_finished(): bool; - public function next_bytes(): bool; - public function get_bytes(): ?string; - public function get_last_error(): ?string; +abstract class WP_Byte_Reader { + abstract public function length(); + abstract public function tell(): int; + abstract public function seek( int $offset ): bool; + abstract public function is_finished(): bool; + abstract public function next_bytes(): bool; + abstract public function get_bytes(): ?string; + abstract public function get_last_error(): ?string; + public function read_all(): string { + $buffer = ''; + while( $this->next_bytes() ) { + $buffer .= $this->get_bytes(); + } + return $buffer; + } } diff --git a/src/WordPress/ByteReader/WP_File_Reader.php b/src/WordPress/ByteReader/WP_File_Reader.php index 56a50f50..43da1893 100644 --- a/src/WordPress/ByteReader/WP_File_Reader.php +++ b/src/WordPress/ByteReader/WP_File_Reader.php @@ -2,7 +2,7 @@ namespace WordPress\ByteReader; -class WP_File_Reader implements WP_Byte_Reader { +class WP_File_Reader extends WP_Byte_Reader { const STATE_STREAMING = '#streaming'; const STATE_FINISHED = '#finished'; @@ -33,7 +33,7 @@ private function __construct( $file_path, $chunk_size ) { $this->chunk_size = $chunk_size; } - public function length(): int { + public function length(): ?int { return filesize( $this->file_path ); } diff --git a/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php index 341370c7..c8c72d9f 100644 --- a/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php +++ b/src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php @@ -20,175 +20,186 @@ * var_dump($file->get_bytes()); * } * - * @TODO: Verify that the remote server supports range requests. - * @TODO: Support requesting multiple ranges in a single request. * @TODO: Abort in-progress requests when seeking to a new offset. */ -class WP_Remote_File_Ranged_Reader { +class WP_Remote_File_Ranged_Reader extends WP_Byte_Reader { - /** - * @var WordPress\AsyncHttp\Client - */ - private $client; private $url; private $remote_file_length; - private $current_request; - private $offset_in_remote_file = 0; - private $offset_in_current_chunk = 0; - private $current_chunk; - private $expected_chunk_size; + private $current_reader; + private $offset_in_remote_file = 0; + private $default_expected_chunk_size = 10 * 1024; // 10 KB + private $expected_chunk_size = 10 * 1024; // 10 KB + private $stop_after_chunk = false; + + /** + * Creates a seekable reader for the remote file. + * Detects support for range requests and falls back to saving the entire + * file to disk when the remote server does not support range requests. + */ + static public function create( $url ) { + $remote_file_reader = new WP_Remote_File_Ranged_Reader( $url ); + /** + * We don't **need** the content-length header to be present. + * + * However, this reader is only used to read remote ZIP files, + * we do need to know the length of the file to be able to read + * the central directory index. + * + * Let's revisit this check once we need to read other types of + * files. + */ + if(false === $remote_file_reader->length()) { + return self::save_to_disk( $url ); + } + + /** + * Try to read the first two bytes of the file to confirm that + * the remote server supports range requests. + */ + $remote_file_reader->seek_to_chunk(0, 2); + if(false === $remote_file_reader->next_bytes()) { + return self::save_to_disk( $url ); + } - public function __construct( $url, $options = array() ) { - $this->client = new \WordPress\AsyncHttp\Client(); - $this->url = $url; + $bytes = $remote_file_reader->get_bytes(); + if(strlen($bytes) !== 2) { + // Oops! We're streaming the entire file to disk now. Let's + // redirect the output to a local file and provide the caller + // with a regular file reader. + return self::redirect_output_to_disk( $remote_file_reader ); + } + + // The remote server supports range requests, good! We can use this reader. + // Let's return to the beginning of the file before returning. + $remote_file_reader->seek(0); + return $remote_file_reader; } - public function length(): int { - throw new \Exception( 'Not implemented yet.' ); + static private function save_to_disk( $url ) { + $remote_file_reader = new WP_Remote_File_Reader( $url ); + return self::redirect_output_to_disk( $remote_file_reader ); } - public function request_bytes( $bytes ) { - if ( null === $this->remote_file_length ) { - $content_length = $this->resolve_content_length(); - if ( false === $content_length ) { - // The remote server won't tell us what the content length is - // @TODO: What should we do in this case? Content-length is critical for - // stream-decompressing remote zip files, but we may not need it - // for other use-cases. + static private function redirect_output_to_disk( WP_Byte_Reader $reader ) { + $file_path = tempnam(sys_get_temp_dir(), 'wp-remote-file-reader-') . '.epub'; + $file = fopen($file_path, 'w'); + // We may have a bytes chunk available at this point. + if($reader->get_bytes()) { + fwrite($file, $reader->get_bytes()); + } + // Keep streaming the file until we're done. + while($reader->next_bytes()) { + fwrite($file, $reader->get_bytes()); + } + fclose($file); + if($reader->get_last_error()) { + // How should we log this error? + return false; + } + return WP_File_Reader::create( $file_path ); + } + + public function __construct( $url ) { + $this->url = $url; + } + + public function next_bytes(): bool { + while( true ) { + if ( null === $this->current_reader ) { + $this->create_reader(); + } + // Advance the offset by the length of the current chunk. + if ( $this->current_reader->get_bytes() ) { + $this->offset_in_remote_file += strlen( $this->current_reader->get_bytes() ); + } + + // We've reached the end of the remote file, we're done. + if ( $this->offset_in_remote_file >= $this->length() - 1 ) { return false; } - $this->remote_file_length = $content_length; + + // We've reached the end of the current chunk, request the next one. + if ( false === $this->current_reader->next_bytes() ) { + if ( $this->stop_after_chunk ) { + return false; + } + $this->current_reader = null; + continue; + } + + // We've got a chunk, return it. + return true; } + } - if ( $this->offset_in_remote_file < 0 || $this->offset_in_remote_file + $bytes > $this->remote_file_length ) { - // TODO: Think through error handling + public function length() { + $this->ensure_content_length(); + if ( null === $this->remote_file_length ) { return false; } + return $this->remote_file_length; + } - $this->seek( $this->offset_in_remote_file ); - - $this->current_request = new \WordPress\AsyncHttp\Request( + private function create_reader() { + $this->current_reader = new WP_Remote_File_Reader( $this->url, array( 'headers' => array( - 'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . ( $this->offset_in_remote_file + $bytes - 1 ), + // @TODO: Detect when the remote server doesn't support range requests, + // do something sensible. We could either stream the entire file, + // or give up. + 'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . ( + $this->offset_in_remote_file + $this->expected_chunk_size - 1 + ), ), ) ); - $this->expected_chunk_size = $bytes; - $this->offset_in_current_chunk = 0; - if ( false === $this->client->enqueue( $this->current_request ) ) { - // TODO: Think through error handling - return false; - } - return true; } - public function seek( $offset ) { + public function seek_to_chunk($offset, $length) { + $this->current_reader->seek($offset); + $this->expected_chunk_size = $length; + $this->stop_after_chunk = true; + } + + public function seek( $offset ): bool { $this->offset_in_remote_file = $offset; // @TODO cancel any pending requests - $this->current_request = null; + $this->current_reader = null; + $this->expected_chunk_size = $this->default_expected_chunk_size; + $this->stop_after_chunk = false; + return true; } - public function tell() { + public function tell(): int { return $this->offset_in_remote_file; } - public function resolve_content_length() { - if ( null !== $this->remote_file_length ) { - return $this->remote_file_length; - } - - $request = new \WordPress\AsyncHttp\Request( - $this->url, - array( 'method' => 'HEAD' ) - ); - if ( false === $this->client->enqueue( $request ) ) { - // TODO: Think through error handling - return false; - } - while ( $this->client->await_next_event() ) { - switch ( $this->client->get_event() ) { - case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: - $response = $request->response; - if ( false === $response ) { - return false; - } - $content_length = $response->get_header( 'Content-Length' ); - if ( false === $content_length ) { - return false; - } - return (int) $content_length; - } - } + public function is_finished(): bool { return false; } - public function next_chunk() { - while ( $this->client->await_next_event() ) { - /** - * Only process events related to the most recent request. - * @TODO: Support redirects. - * @TODO: Cleanup resources for stale requests. - */ - if ( $this->current_request->id !== $this->client->get_request()->id ) { - continue; - } + public function get_bytes(): ?string { + return $this->current_reader->get_bytes(); + } - if ( $this->offset_in_current_chunk >= $this->expected_chunk_size ) { - // The remote server doesn't support range requests and sent us a chunk larger than expected. - // @TODO: Handle this case. Should we stream the entire file, or give up? - // Should we cache the download locally, or request the entire file again every - // time we need to seek()? - return false; - } + public function get_last_error(): ?string { + // @TODO: Preserve the error information when the current reader + // is reset. + return $this->current_reader->get_last_error(); + } - switch ( $this->client->get_event() ) { - case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: - $request = $this->client->get_request(); - if ( ! $request ) { - return false; - } - $response = $request->response; - if ( false === $response ) { - return false; - } - if ( - $response->status_code !== 206 || - false === $response->get_header( 'Range' ) - ) { - // The remote server doesn't support range requests - // @TODO: Handle this case. Should we stream the entire file, or give up? - // Should we cache the download locally, or request the entire file again every - // time we need to seek()? - return false; - } - break; - case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE: - $chunk = $this->client->get_response_body_chunk(); - if ( ! is_string( $chunk ) ) { - // TODO: Think through error handling - return false; - } - $this->current_chunk = $chunk; - $this->offset_in_remote_file += strlen( $chunk ); - $this->offset_in_current_chunk += strlen( $chunk ); - - return true; - case \WordPress\AsyncHttp\Client::EVENT_FAILED: - // TODO: Think through error handling. Errors are expected when working with - // the network. Should we auto retry? Make it easy for the caller to retry? - // Something else? - return false; - case \WordPress\AsyncHttp\Client::EVENT_FINISHED: - // TODO: Think through error handling - return false; - } + private function ensure_content_length() { + if ( null !== $this->remote_file_length ) { + return $this->remote_file_length; + } + if(null === $this->current_reader) { + $this->current_reader = new WP_Remote_File_Reader( $this->url ); } + $this->remote_file_length = $this->current_reader->length(); + return $this->remote_file_length; } - public function get_bytes() { - return $this->current_chunk; - } } diff --git a/src/WordPress/ByteReader/WP_Remote_File_Reader.php b/src/WordPress/ByteReader/WP_Remote_File_Reader.php index 156ddec5..3c7ab643 100644 --- a/src/WordPress/ByteReader/WP_Remote_File_Reader.php +++ b/src/WordPress/ByteReader/WP_Remote_File_Reader.php @@ -5,27 +5,26 @@ /** * Streams bytes from a remote file. */ -class WP_Remote_File_Reader implements WP_Byte_Reader { +class WP_Remote_File_Reader extends WP_Byte_Reader { /** * @var WordPress\AsyncHttp\Client */ private $client; private $url; + private $headers; private $request; private $current_chunk; private $last_error; private $is_finished = false; private $bytes_already_read; + private $remote_file_length; private $skip_bytes = 0; - public function __construct( $url ) { + public function __construct( $url, $headers = array() ) { $this->client = new \WordPress\AsyncHttp\Client(); $this->url = $url; - } - - public function length(): int { - throw new \Exception( 'Not implemented yet.' ); + $this->headers = $headers; } public function tell(): int { @@ -34,7 +33,12 @@ public function tell(): int { public function seek( $offset_in_file ): bool { if ( $this->request ) { - _doing_it_wrong( __METHOD__, 'Cannot set a remote file reader cursor on a remote file reader that is already initialized.', '1.0.0' ); + _doing_it_wrong( + __METHOD__, + 'Cannot seek() a WP_Remote_File_Reader instance once the request was initialized. ' . + 'Use WP_Remote_File_Ranged_Reader to seek() using range requests instead.', + '1.0.0' + ); return false; } $this->skip_bytes = $offset_in_file; @@ -44,7 +48,8 @@ public function seek( $offset_in_file ): bool { public function next_bytes(): bool { if ( null === $this->request ) { $this->request = new \WordPress\AsyncHttp\Request( - $this->url + $this->url, + array( 'headers' => $this->headers ) ); if ( false === $this->client->enqueue( $this->request ) ) { // TODO: Think through error handling @@ -55,7 +60,28 @@ public function next_bytes(): bool { $this->after_chunk(); while ( $this->client->await_next_event() ) { + $request = $this->client->get_request(); + if ( ! $request ) { + continue; + } + $response = $request->response; + if ( false === $response ) { + continue; + } + if ( $request->redirected_to ) { + continue; + } + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + if(null !== $this->remote_file_length) { + continue 2; + } + $content_length = $response->get_header( 'Content-Length' ); + if ( false !== $content_length ) { + $this->remote_file_length = (int) $content_length; + } + break; case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE: $chunk = $this->client->get_response_body_chunk(); if ( ! is_string( $chunk ) ) { @@ -94,6 +120,47 @@ public function next_bytes(): bool { } } + public function length(): ?int { + if ( null !== $this->remote_file_length ) { + return $this->remote_file_length; + } + + $request = new \WordPress\AsyncHttp\Request( + $this->url, + array( 'method' => 'HEAD' ) + ); + if ( false === $this->client->enqueue( $request ) ) { + // TODO: Think through error handling + return false; + } + while ( $this->client->await_next_event() ) { + switch ( $this->client->get_event() ) { + case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS: + $request = $this->client->get_request(); + if ( ! $request ) { + return false; + } + if($request->redirected_to) { + continue 2; + } + $response = $request->response; + if ( false === $response ) { + return false; + } + $content_length = $response->get_header( 'Content-Length' ); + if ( false === $content_length ) { + return false; + } + $this->remote_file_length = (int) $content_length; + break; + } + } + if(null === $this->remote_file_length) { + return false; + } + return $this->remote_file_length; + } + private function after_chunk() { if ( $this->current_chunk ) { $this->bytes_already_read += strlen( $this->current_chunk ); From c9edbfa106fd928fa442c0ce191561acfb169e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Mon, 16 Dec 2024 22:34:36 +0100 Subject: [PATCH 3/4] Consider byte steraming errors in WP_Byte_Reader::read_all() --- src/WordPress/ByteReader/WP_Byte_Reader.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/WordPress/ByteReader/WP_Byte_Reader.php b/src/WordPress/ByteReader/WP_Byte_Reader.php index dc47f733..75ee7d88 100644 --- a/src/WordPress/ByteReader/WP_Byte_Reader.php +++ b/src/WordPress/ByteReader/WP_Byte_Reader.php @@ -18,6 +18,9 @@ public function read_all(): string { while( $this->next_bytes() ) { $buffer .= $this->get_bytes(); } + if( $this->get_last_error() ) { + return false; + } return $buffer; } } From b52a93ce17562a1964fb27df770792fe165b217b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 17 Dec 2024 13:04:54 +0100 Subject: [PATCH 4/4] Revert client changes --- src/WordPress/AsyncHttp/Request.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/WordPress/AsyncHttp/Request.php b/src/WordPress/AsyncHttp/Request.php index 78e168ba..ddd21386 100644 --- a/src/WordPress/AsyncHttp/Request.php +++ b/src/WordPress/AsyncHttp/Request.php @@ -62,16 +62,6 @@ public function __construct( string $url, $request_info = array() ) { } } - public function get_request_info() { - return [ - 'http_version' => $this->http_version, - 'method' => $this->method, - 'headers' => $this->headers, - 'body_stream' => $this->upload_body_stream, - 'redirected_from' => $this->redirected_from, - ]; - } - public function latest_redirect() { $request = $this; while ( $request->redirected_to ) {