diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 6ae9959..780ee7d 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: - target: [subfile-exchange] + target: [file-exchange] permissions: packages: write diff --git a/.github/workflows/gen-binaries.yml b/.github/workflows/gen-binaries.yml index 123c0e5..f5257aa 100644 --- a/.github/workflows/gen-binaries.yml +++ b/.github/workflows/gen-binaries.yml @@ -23,8 +23,8 @@ jobs: GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} with: upload_url: ${{ github.event.release.upload_url }} - asset_path: ./target/release/subfile-exchange - asset_name: subfile-exchange-${{ github.event.release.tag_name }}-ubuntu + asset_path: ./target/release/file-exchange + asset_name: file-exchange-${{ github.event.release.tag_name }}-ubuntu asset_content_type: binary/octet-stream build-macos: runs-on: macos-latest @@ -43,6 +43,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} with: upload_url: ${{ github.event.release.upload_url }} - asset_path: ./target/release/subfile-exchange - asset_name: subfile-exchange-${{ github.event.release.tag_name }}-macos + asset_path: ./target/release/file-exchange + asset_name: file-exchange-${{ github.event.release.tag_name }}-macos asset_content_type: binary/octet-stream diff --git a/Cargo.lock b/Cargo.lock index 90ea1e9..ac73a5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1660,6 +1660,100 @@ dependencies = [ "subtle", ] +[[package]] +name = "file-exchange" +version = "0.0.1" +dependencies = [ + "alloy-primitives", + "alloy-sol-types", + "anyhow", + "base64 0.21.5", + "bs58", + "build-info", + "build-info-build", + "bytes", + "cargo-husky", + "chrono", + "clap", + "confy", + "criterion", + "derive-getters", + "dotenv", + "ethers", + "ethers-core", + "futures 0.3.29", + "hdwallet", + "hex", + "http", + "hyper", + "ipfs-api-backend-hyper", + "ipfs-api-prelude", + "merkle-cbt", + "object_store", + "rand", + "reqwest", + "rustls", + "rustls-pemfile", + "secp256k1 0.28.0", + "serde", + "serde_json", + "serde_yaml", + "sha2", + "tap_core", + "tempfile", + "tokio", + "tokio-retry", + "toml 0.7.8", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "file-service" +version = "0.0.1" +dependencies = [ + "alloy-primitives", + "alloy-sol-types", + "anyhow", + "base64 0.21.5", + "build-info", + "build-info-build", + "bytes", + "cargo-husky", + "chrono", + "clap", + "confy", + "criterion", + "derive-getters", + "dotenv", + "ethers", + "ethers-core", + "file-exchange", + "futures 0.3.29", + "hex", + "http", + "hyper", + "ipfs-api-backend-hyper", + "ipfs-api-prelude", + "merkle-cbt", + "rand", + "reqwest", + "rustls", + "rustls-pemfile", + "secp256k1 0.28.0", + "serde", + "serde_json", + "serde_yaml", + "sha2", + "tap_core", + "tempfile", + "tokio", + "tokio-retry", + "toml 0.7.8", + "tracing", + "tracing-subscriber", +] + [[package]] name = "fixed-hash" version = "0.8.0" @@ -4153,100 +4247,6 @@ dependencies = [ "syn 2.0.41", ] -[[package]] -name = "subfile-exchange" -version = "0.0.1" -dependencies = [ - "alloy-primitives", - "alloy-sol-types", - "anyhow", - "base64 0.21.5", - "bs58", - "build-info", - "build-info-build", - "bytes", - "cargo-husky", - "chrono", - "clap", - "confy", - "criterion", - "derive-getters", - "dotenv", - "ethers", - "ethers-core", - "futures 0.3.29", - "hdwallet", - "hex", - "http", - "hyper", - "ipfs-api-backend-hyper", - "ipfs-api-prelude", - "merkle-cbt", - "object_store", - "rand", - "reqwest", - "rustls", - "rustls-pemfile", - "secp256k1 0.28.0", - "serde", - "serde_json", - "serde_yaml", - "sha2", - "tap_core", - "tempfile", - "tokio", - "tokio-retry", - "toml 0.7.8", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "subfile-service" -version = "0.0.1" -dependencies = [ - "alloy-primitives", - "alloy-sol-types", - "anyhow", - "base64 0.21.5", - "build-info", - "build-info-build", - "bytes", - "cargo-husky", - "chrono", - "clap", - "confy", - "criterion", - "derive-getters", - "dotenv", - "ethers", - "ethers-core", - "futures 0.3.29", - "hex", - "http", - "hyper", - "ipfs-api-backend-hyper", - "ipfs-api-prelude", - "merkle-cbt", - "rand", - "reqwest", - "rustls", - "rustls-pemfile", - "secp256k1 0.28.0", - "serde", - "serde_json", - "serde_yaml", - "sha2", - "subfile-exchange", - "tap_core", - "tempfile", - "tokio", - "tokio-retry", - "toml 0.7.8", - "tracing", - "tracing-subscriber", -] - [[package]] name = "subtle" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index e273978..a222eca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [workspace] members = [ - "subfile-exchange", - "subfile-service", + "file-exchange", + "file-service", ] resolver = "2" diff --git a/Dockerfile.subfile-exchange b/Dockerfile.subfile-exchange index 8473b48..9eaf94b 100644 --- a/Dockerfile.subfile-exchange +++ b/Dockerfile.subfile-exchange @@ -5,7 +5,7 @@ COPY . . RUN ls -a -RUN cargo build --release --bin subfile-exchange +RUN cargo build --release --bin file-exchange ######################################################################################## @@ -14,6 +14,6 @@ FROM debian:bookworm-slim RUN apt-get update && apt-get install -y --no-install-recommends \ openssl ca-certificates \ && rm -rf /var/lib/apt/lists/* -COPY --from=build /root/target/release/subfile-exchange /usr/local/bin/subfile-exchange +COPY --from=build /root/target/release/file-exchange /usr/local/bin/file-exchange -ENTRYPOINT [ "/usr/local/bin/subfile-exchange" ] +ENTRYPOINT [ "/usr/local/bin/file-exchange" ] diff --git a/README.md b/README.md index d28df6e..63f91f0 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ -# Subfile-exchange +# File Exchange ## Introduction Enable file sharing as a exchange, aim for a decentralized, efficient, and verifiable market, with scalable, performant, and secure software. -Subfile-exchange is a decentralized, peer-to-peer data sharing platform designed for efficient and verifiable file sharing. It leverages a combination of technologies including Hash commitments on IPFS for file discovery and verification, chunk data transfer and micropayments reducing trust requirements between clients and servers, and HTTPS over HTTP2 for secure and efficient data transfer. The system is built with scalability, performance, integrity, and security in mind, aiming to create a robust market for file sharing. +File Exchange is a decentralized, peer-to-peer data sharing platform designed for efficient and verifiable file sharing. It leverages a combination of technologies including Hash commitments on IPFS for file discovery and verification, chunk data transfer and micropayments reducing trust requirements between clients and servers, and HTTPS over HTTP2 for secure and efficient data transfer. The system is built with scalability, performance, integrity, and security in mind, aiming to create a robust market for file sharing. -Subfile-exchange leverages IPFS for file discovery and verification, ensuring that each piece of data shared is authentic and unaltered. The use of SHA2-256 for hashing provides a balance of speed and security, making the system both fast and impenetrable to known cryptographic attacks. Furthermore, the adoption of HTTPS over HTTP2 with range requests ensures that all data transfers are not only swift but also secure, safeguarding against common internet vulnerabilities and minimizing risks per transaction. +File Exchange leverages IPFS for file discovery and verification, ensuring that each piece of data shared is authentic and unaltered. The use of SHA2-256 for hashing provides a balance of speed and security, making the system both fast and impenetrable to known cryptographic attacks. Furthermore, the adoption of HTTPS over HTTP2 with range requests ensures that all data transfers are not only swift but also secure, safeguarding against common internet vulnerabilities and minimizing risks per transaction. ## Target Audience -This documentation is tailored for individuals who have a basic understanding of decentralized technologies, peer-to-peer networks, and cryptographic principles. Whether you are an indexer running various blockchain nodes looking for sharing and verifying your data, an indexer looking to launch service for a new chain, or simply a user interested in the world of decentralized file sharing, this guide aims to provide you with a clear and comprehensive understanding of how Subfile-service operates. +This documentation is tailored for individuals who have a basic understanding of decentralized technologies, peer-to-peer networks, and cryptographic principles. Whether you are an indexer running various blockchain nodes looking for sharing and verifying your data, an indexer looking to launch service for a new chain, or simply a user interested in the world of decentralized file sharing, this guide aims to provide you with a clear and comprehensive understanding of how File Service operates. ## Features @@ -49,7 +49,7 @@ You may learn background information on various components of the exchange #### [Design Principle](docs/architecture.md) -#### [Packaging](docs/subfile_manifest.md) +#### [Entity Definition](docs/manifest.md) #### [Contracts](docs/contracts.md) diff --git a/contributing.md b/contributing.md index 2ddc045..c55416c 100644 --- a/contributing.md +++ b/contributing.md @@ -1,7 +1,7 @@ -# Contributing to Subfile-exchange +# Contributing to File Hosting Service -Welcome to the Subfile Exchange! Thanks a ton for your interest in contributing. +Welcome to the File Hosting Service! Thanks a ton for your interest in contributing. If you run into any problems feel free to create an issue. PRs are much appreciated for simple things. If it's something more complex we'd appreciate having a quick chat in GitHub Issues or the Graph Discord server. diff --git a/docs/architecture.md b/docs/architecture.md index 3345fb1..e444b9b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,24 +2,24 @@ ### Decentralized Architecture -Unlike traditional centralized systems where data is stored and managed through a single entity, Subfile-service distributes data across a network of nodes. This decentralization ensures resilience against failures and attacks as there is no single point of failure; it also enhances privacy and control as users are not reliant on a central authority for data management. +Unlike traditional centralized systems where data is stored and managed through a single entity, File Service distributes data across a network of nodes. This decentralization ensures resilience against failures and attacks as there is no single point of failure; it also enhances privacy and control as users are not reliant on a central authority for data management. The architecture is underpinned by a peer-to-peer (P2P) network framework, where each node in the network can act as a client and/or a server. This setup facilitates direct file sharing among users without the need for intermediaries, and as more users join and contribute to the network, the system becomes more robust and capable of handling larger volumes of data. ### Generic Diagram -![Diagram](./subfile-exchange.png) +![Diagram](./file-exchange.png) ### Key Components 1. **File Hasher** ensures data integrity. It uses the SHA2-256 hashing algorithm to process files. The hasher chunks files into manageable sizes (currently 1MB), hashes each chunk, and then organizes these hashes into a Merkle tree structure (Currently we are using an ordered list, but should be relatively simple to update to use the tree structure as it optimizes the verification process ($O(n)$ versus $O(log(n)$ for a single chunk verification where $n$ is the number of chunks), but require 2x memory usage for the hash data structure). -2. **Subfile Publisher** is responsible for preparing and publishing files onto the network. It takes files, processes them through the File Hasher to generate a chunk_file.yaml containing chunk hashes, and then publishes this data to IPFS. The Subfile Builder/Publisher also constructs a subfile manifest, which contains metadata and other relevant information about the files. +2. **Manifest Publisher** is responsible for preparing and publishing files onto the network. It takes files, processes them through the File Hasher to generate a file_manifest.yaml containing chunk hashes, and then publishes this data to IPFS. The Manifest Builder/Publisher also constructs a file/bundle manifest, which contains metadata and other relevant information about the files. 3. **IPFS Client** connects to the IPFS network as it is used for posting files to the network and retrieving them. IPFS plays a crucial role in discovering and verifying files, as it allows for content-addressable storage. -4. **Subfile Server** requires an operator mnemonic for initialization and handles various tasks such as retrieving files from IPFS, managing subfiles services, verifying file integrity against chunk hashes, and managing API endpoints. The server also implements routes for various functionalities like health checks, version information, and file availability. +4. **File Server** requires an operator mnemonic for initialization and handles various tasks such as retrieving files from IPFS, managing file services, verifying file integrity against chunk hashes, and managing API endpoints. The server also implements routes for various functionalities like health checks, version information, and file availability. -5. **Subfile Client** is used to request and receive files. It handles the construction of requests, including the addition of authentication tokens and, in future iterations, will manage budgeting for file downloads. The client is responsible for ensuring that the received files are complete and authentic by verifying each chunk against the hashes provided by the File Hasher. +5. **File Downloader** is used to request and receive files. It handles the construction of requests, including the addition of authentication tokens and, in future iterations, will manage budgeting for file downloads. The client is responsible for ensuring that the received files are complete and authentic by verifying each chunk against the hashes provided by the File Hasher. diff --git a/docs/client_guide.md b/docs/client_guide.md index c66d105..dd75e19 100644 --- a/docs/client_guide.md +++ b/docs/client_guide.md @@ -19,13 +19,13 @@ To minimize trust requirements, the client employs a chunk-based payment system. ## CLI Usage -The client operates through a command-line interface (CLI) for simplicity and ease of use. Client would need to determine the subfile that contains the dataset they desire. This may mean looking at subfile manifests or in the future a tool that matches subfiles by the provided critieria. +The client operates through a command-line interface (CLI) for simplicity and ease of use. Client would need to determine the Bundle that contains the dataset they desire. This may mean looking at Bundle manifests or in the future a tool that matches manifests by the provided critieria. -After determining the subfile CID, client should supply a local path for writing the subfile corresponding files, a wallet for payments or a free query auth token, and a list of indexer endpoints (this should be handled by gateway or a scraping client). +After determining the Bundle CID, client should supply a local path for writing the Bundle corresponding files, a wallet for payments or a free query auth token, and a list of indexer endpoints (this should be handled by gateway or a scraping client). ### CLI example ``` -➜ subfile-exchange git:(main) ✗ cargo run -p subfile-exchange downloader \ +➜ file-exchange git:(main) ✗ cargo run -p file-exchange downloader \ --ipfs-hash QmHash \ --indexer-endpoints http://localhost:5678,http://localhost:5677 \ --free-query-auth-token 'Bearer auth_token' \ @@ -49,7 +49,7 @@ To use the client effectively, you will need: ### Getting Started 1. Download and install the source code. -2. Gather configurations: Identify the CID of the desired subfile, registered indexer endpoints, a local path for storing the downloaded files, private key (or mnemonics) of a wallet valid for Escrow payments, (optional) Obtain a free query auth token for limited access. +2. Gather configurations: Identify the CID of the desired Bundle, registered indexer endpoints, a local path for storing the downloaded files, private key (or mnemonics) of a wallet valid for Escrow payments, (optional) Obtain a free query auth token for limited access. 3. Use the CLI commands to download files. Enjoy seamless access to a vast world of data! @@ -58,9 +58,9 @@ Enjoy seamless access to a vast world of data! The client prioritizes user safety and security. It employs secure communication protocols and wallet management practices. However, users should always be mindful of potential risks: -- Choosing subfiles: Verify correctness of the subfile before initiating file requests. +- Choosing manifests: Verify correctness of the Bundle before initiating file requests. - Securing wallet: Implement strong key protection and other security measures for the wallet. -- Staying informed: Updated on the latest security threats, invalid subfiles, and updates for the client software. +- Staying informed: Updated on the latest security threats, invalid manifests, and updates for the client software. ### Join the Community diff --git a/docs/discovery.md b/docs/discovery.md index 2164bf2..5dc74f8 100644 --- a/docs/discovery.md +++ b/docs/discovery.md @@ -4,33 +4,33 @@ With various packaging methods, types of files, and other types of combinations, ## Off-chain approach -Indexers serve `/status` endpoint that provides a list of Subfile IPFS hashes, representing the list of available subfiles the local indexer is serving. This is sufficient for matching specific subfiles, but no matching for specific files. +Indexers serve `/status` endpoint that provides a list of Manifest IPFS hashes, representing the list of available lists the local indexer is serving. This is sufficient for matching specific manifests for bundles, later on we will allow matching for single file manifests. -On the subfile level, the discovery is relatively straightforward for the client, given that they have choosen a subfile IPFS hash to download (`target_subfile`). +On both **Bundle/File** level, the discovery is relatively straightforward for the client, given that they have choosen a Manifest IPFS hash to download (`target_manifest`). 1. Client provide a status list of `indexer_endpoints`. Later on, we can add an automatic mode of grabbing all registered indexer service url from the registery contract. -2. Client pings `/operator` and `/status` endpoint for all indexer endpoints. `/operator` will provide the indexer operator and `/status` endpoint will provide the indexer's available subfiles. +2. Client pings `/operator` and `/status` endpoint for all indexer endpoints. `/operator` will provide the indexer operator and `/status` endpoint will provide the indexer's available manifests. - a. if `target_subfile` is in the available subfiles, collect indexer operator and endpoint as an available service + a. if `target_manifest` is in the available manifests, collect indexer operator and endpoint as an available service 3. Collect a list of available services. Returns early if the list is empty. -We further consider matching files across subfiles so that consumers can be prompted with alternatives if the `target_subfile` is unavailable. This increases file availability by decreasing the criteria for matching a particular service. +If discovery is matching on a `Bundle` level, we further consider matching files across bundle manifests so that consumers can be prompted with alternatives if the `target_manifest` is unavailable. This increases file availability by decreasing the criteria for matching a bundle. -Imagine a server serving $subfile_a = {file_x, file_y, file_z}$. Client requests $subfile_b = {file_x}$. The subfile level check will determine that $subfile_a\neq subfile_b$. We add an additional check to resolve $subfile_a$ and $subfile_b$ to chunk file hashes for matching. +Imagine a server serving $bundle_a = {file_x, file_y, file_z}$. Client requests $bundle_b = {file_x}$. The Bundle IPFS hash check will determine that $bundle_a\neq bundle_b$. We add an additional check to resolve $bundle_a$ and $bundle_b$ to file manifest hashes for matching. -1. Query the content of `target_subfile` for its vector of chunk file hashes +1. Query the content of `target_bundle` for its vector of file manifest hashes -2. Query the content of subfiles served by indexers, create a nested map of indexer to subfiles to files. +2. Query the content of bundles served by indexers, create a nested map of indexer to bundles to files: `Map>`. -3. For each target file, check if there is an indexer serving a subfile that contains the target file. Record the indexer and subfile hash, indexed by the file hash. +3. For each `target_bundle` file manifest, check if there is an indexer serving a bundle that contains the target file. Record the indexer and bundle hash, indexed by the file hash. -4. if there is a target file that unavailable at any indexer/subfile, immediately return unavailability as the target subfile cannot be completed. +4. if there is a target file unavailable from all indexers, immediately return unavailability as the `target_manifest` cannot be completed. -5. return the recorded map of file to queriable indexer_endpoint and subfile hash +5. return the recorded map of file to queriable `indexer_endpoint` and manifest hash for the user evaluation -Later on, we may generate a summary of which subfile has the highest percentage of compatibility. The further automated approach will consist of client taking the recorded availability map and construct range download requests based on the corresponding indexer_endpoint, server subfile, and file hash. +Later on, we may generate a summary of which manifest has the highest percentage of compatibility. The further automated approach will consist of client taking the recorded availability map and construct range download requests based on the corresponding indexer_endpoint, server manifest, and file hash. In the diagram below, keep in mind that it is possible for IPFS files (schema files) to be hosted by indexer services as well, which will remove the necessity of using an IPFS gateway. However, for the sake of simplicity and accuracy to the current state of the project, we keep the IPFS gateway component required. @@ -39,7 +39,7 @@ graph LR I[Indexer] -->|post schema| IPFS[IPFS Gateway] I -->|post schema
manage availability| I C[Client] -.->|schema| IPFS - C -->|determine subfile hash| C + C -->|determine Bundle hash| C C -.->|schema| I C -.->|availability| I C -->|paid query| I @@ -52,7 +52,7 @@ graph LR I[Indexer] -->|post schema| IPFS[IPFS Gateway] E[Explorer] -.->|availability| I E -.->|query scehma| IPFS - C[Client] -.->|select subfile| E + C[Client] -.->|select Bundle| E C -->|authorize| E E -->|paid query| I E -->|respond| C @@ -65,18 +65,18 @@ graph LR Indexers registers their server url at the registry contract, as this step is crucial in making the Indexer's service discoverable and accessible within the network. We assume Indexer has already registered (through indexer-agent). -Indexers are expected to create explicit allocation against a specific IPFS hash. The hashes uniquely identify subfiles, acting as a unit identifiable and verifiable between the Indexers and the data requested by consumers. This process ensures that data retrieval is efficiently managed and that Indexers are appropriately allocated to serve specific data needs. +Indexers are expected to create explicit allocation against a specific IPFS hash. The hashes uniquely identify bundles, acting as a unit identifiable and verifiable between the Indexers and the data requested by consumers. This process ensures that data retrieval is efficiently managed and that Indexers are appropriately allocated to serve specific data needs. The network subgraph keeps track of all registered Indexers and their active (and closed) allocations. We assume an update to the network subgraph such that `SubgraphDeployment` field gets renamed or encasuplated to a more generic entity such as `DataServiceDeployment` with an additional `dataService` enum field. This addition is essential for querying and filtering information about deployments. -- Identify Available Subfiles: Clients can view all subfiles currently available in the network, along with the Indexers allocated to these subfiles. -- Query Specific Subfiles: Once a desired subfile is identified, clients can make targeted queries pertaining to that subfile and the Indexers actively allocated to it. +- Identify Available Bundles: Clients can view all bundles currently available in the network, along with the Indexers allocated to these bundles. +- Query Specific Bundles: Once a desired Bundle is identified, clients can make targeted queries pertaining to that Bundle and the Indexers actively allocated to it. With the updated network subgraph, on-chain discovery can be done with flexible queries. ```graphql query { -// Discover through subfile IPFS hash +// Discover through Bundle IPFS hash subgraphDeployments(where:{ipfsHash: $deployment_ipfs_hash}){ id ipfsHash @@ -91,7 +91,7 @@ subgraphDeployments(where:{ipfsHash: $deployment_ipfs_hash}){ // Discover through indexers indexers { allocations(where: { - dataServiceType: Subfile, + dataServiceType: File, fileDeployment: $deployment_ipfs_hash }) { id @@ -110,9 +110,9 @@ indexers { **Off-chain approach** -Clients can discover all the available subfiles through the network subgraph, and the allocated indexers. They are responsible for identifying the desired subfiles and making query specific to the subfile and the actively allocated indexers. To gain insights to an IPFS hash, the client might query the IPFS file content to read subfile descriptions and chunk file hashes. +Clients can discover all the available bundles through the network subgraph, and the allocated indexers. They are responsible for identifying the desired bundles and making query specific to the Bundle and the actively allocated indexers. To gain insights to an IPFS hash, the client might query the IPFS file content to read Bundle descriptions and file manifest hashes. -A client may want to resolve all the available subfile manifest to discover the best fit for their interest, or a client may decide to download a specific file instead of all the files contained in a subfile. Discovery can be made through specific indexer service endpoints or IPFS gateways. +A client may want to resolve all the available Bundle manifest to discover the best fit for their interest, or a client may decide to download a specific file instead of all the files contained in a Bundle. Discovery can be made through specific indexer service endpoints or IPFS gateways. ```mermaid graph TD @@ -125,7 +125,7 @@ graph TD I[Indexer] -->|post schema| IPFS[IPFS Gateway] E[Explorer] -.->|availability| NS E -.->|query scehma| IPFS - C[Client] -.->|select subfile| E + C[Client] -.->|select Bundle| E C -->|paid query| E E -->|routed paid query| I C -->|direct paid Query| I diff --git a/docs/feature_checklist.md b/docs/feature_checklist.md index 7149aeb..c728a46 100644 --- a/docs/feature_checklist.md +++ b/docs/feature_checklist.md @@ -4,49 +4,49 @@ - [x] File hasher - [x] use sha2-256 as it is more commonly used, faster than sha3-256, both no known hacks (should be easy to switch) - [x] Takes a file path and read - - [x] Chunk file to a certain size - currently using a constant of 1MB + - [x] File manifest to a certain size - currently using a constant of 1MB - [X] Hash each chunk as leaves (nodes) - [x] Produce a merkle tree - - [x] construct and write a chunk_file.yaml (root, nodes) + - [x] construct and write a file_manifest.yaml (root, nodes) - [x] Unit tests: same file same hash, different file different hash, big temp file same/modified - [x] last chunk lengths, - [ ] Analyze merkle tree vs hash list for memory usage and verification runtime -- [x] Subfile builder / publisher - CLI - - [x] Take a file, use File hasher to get the chunk_file, publish chunk_file to IPFS +- [x] Manifest builder / publisher - CLI + - [x] Take a file, use File hasher to get the file_manifest, publish file_manifest to IPFS - [x] later, take a list of files, use File hasher to hash all files and get root hashes - - [x] Construct a subfile manifest with metainfo using YAML builder + - [x] Construct a manifest with metainfo using YAML builder - [x] vectorize - [x] May include a status endpoint for the "canonical" publisher, but recognize the endpoint may change later on - - [x] Publish subfile to IPFS, receive a IPFS hash for the subfile + - [x] Publish manifest to IPFS, receive a IPFS hash for the manifest - [x] IPFS client - [x] Connect to an IPFS gateway - [x] Post files - [x] Cat files - [x] YAML parser and builder - [x] Deserialize and serialize yaml files -- [ ] Subfile server +- [ ] Manifest server - [x] require operator mnemonic - [ ] Use a generic path - - [x] Initialize service; for one subfile, take (ipfs_hash, local_path) - - [x] Take a subfile IPFS hash and get the file using IPFS client - - [x] Parse yaml file for all the chunk_file hashes using Yaml parser, construct the subfile object - - [x] Take metainfo of chunk_file and search for access by the local_path + - [x] Initialize service; for one Bundle, take (ipfs_hash, local_path) + - [x] Take a Bundle IPFS hash and get the file using IPFS client + - [x] Parse yaml file for all the file_manifest hashes using Yaml parser, construct the Bundle object + - [x] Take metainfo of file_manifest and search for access by the local_path - [x] Verify local file against the chunk hashes - - [x] vectorize service for multiple subfiles + - [x] vectorize service for multiple bundles - [x] Once verified, add to file to the service availability endpoint - [x] Route `/` for "Ready to roll!" - [x] Route `/operator` for operator info - [x] Route `/status` for availability - [x] verification for availability - - [x] Route `/subfiles/id/:id` for a subfile using IPFS hash with range requests + - [x] Route `/bundles/id/:id` for a Bundle using IPFS hash with range requests - [x] Route `/health` for general health - - [x] Route `/version` for subfile server version + - [x] Route `/version` for Bundle server version - [x] Configure and check free query auth token - [ ] (?) Server Certificate - [ ] Upon receiving a service request (ipfs_hash, range, receipt) - [x] start off with request as (ipfs_hash, range) - [x] Check if ipfs_hash is available - - [x] Check if range is valid against the subfile and the specific chunk_file + - [x] Check if range is valid against the Bundle and the specific file_manifest - [ ] TAP: Valid and store receipt - [x] Read in the requested chunk - [x] Add tests @@ -55,15 +55,15 @@ - [x] Start with free service and requiring a free query auth token - [x] default pricing, allow updates for pricing per byte - [ ] Runs TAP agent for receipt management -- [ ] Subfile Client +- [ ] File Download Client - [ ] Take private key/mneomic for wallet connections - [x] Request using ipfs_hash - - [ ] take budget for the overall subfile + - [ ] take budget for the overall bundle/file - [ ] construct receipts using budget and chunk sizes - [ ] add receipt to request - [x] add free_token to request - [ ] File discovery and matching (Gateway?) - - [x] Read subfile manifest + - [x] Read bundle manifest - [x] Ping indexer endpoints data availability - [ ] Pricing and performances, run indexer selection - [x] Parallel requests @@ -74,7 +74,7 @@ - [x] Wait for the responses (For now, assume that the response chunks correspond with the verifiable chunks) - [x] Keeps track of the downloaded and missing pieces, - [x] continually requesting missing pieces until the complete file is obtained - - [x] Upon receiving a response, verify the chunk data in the chunk_file + - [x] Upon receiving a response, verify the chunk data in the file_manifest - [x] if failed, blacklist the indexer - [x] Once all file has been received and verified, terminate diff --git a/docs/subfile_manifest.md b/docs/manifest.md similarity index 82% rename from docs/subfile_manifest.md rename to docs/manifest.md index 3786c85..15e7c24 100644 --- a/docs/subfile_manifest.md +++ b/docs/manifest.md @@ -1,27 +1,27 @@ -## Subfile manfiest specifications +## Manfiest specifications -Structure of subfile and chunk files +Structure of Bundle and File Manifests ```mermaid erDiagram - Publisher ||..|{ ChunkFile : publishes - ChunkFile ||--|| FileMetaInfo : hash + Publisher ||..|{ FileManifest : publishes + FileManifest ||--|| FileMetaInfo : hash FileMetaInfo }|--|| SupFile : belongs_to Publisher ||..|| SupFile : publishes Server }|..|| SupFile : host - Server }|--|{ ChunkFile : resolve + Server }|--|{ FileManifest : resolve Client }|..|| SupFile : discover Client }|..|| Server : request - Client }|--|{ ChunkFile : validate + Client }|--|{ FileManifest : validate - ChunkFile { + FileManifest { u64 total_bytes u64 chunk_size VecString chunk_hashes } Publisher { String read_dir - String subfile_name + String bundle_name VecString file_names String file_type String file_version @@ -49,7 +49,7 @@ erDiagram Server { String host usize port - VecString subfiles + VecString bundles OptionString free_query_auth_token OptionString admin_auth_token String mnemonic @@ -65,11 +65,11 @@ erDiagram ``` -A file will have the same Chunk file CID if they share the same content, chunked by the same size, and with the same hashing scheme; the file name and publisher properties will not affect the chunk file CID. +A file will have the same File manifest CID if they share the same content, chunked by the same size, and with the same hashing scheme; the file name and publisher properties will not affect the file manifest CID. -The CID for the subfile can vary based on the makeup of the files and meta information about the set of the files. +The CID for the Bundle can vary based on the makeup of the files and meta information about the set of the files. -While servers and clients can simply exchange a published subfile by the exact files contained, we expect the possibility to match availability on a chunk file CID level, so the server serving a subfile with overlapping set of files with the target subfile can still provide for the overlapping content. +While servers and clients can simply exchange a published bundle by the exact files contained, we expect the possibility to match availability on a file manifest CID level, so the server serving a bundle with overlapping set of files with the target bundle can still provide for the overlapping content. ### Verification options @@ -105,7 +105,7 @@ Depending on the package sizes and client requirements, different validation met ### Current manifest -#### Subfile manifest +#### Bundle manifest https://ipfs.network.thegraph.com/api/v0/cat?arg=QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v ``` @@ -125,7 +125,7 @@ block_range: end_block: null ``` -#### Chunk file schema +#### File manifest schema https://ipfs.network.thegraph.com/api/v0/cat?arg=QmeE38uPSqT5XuHfM8X2JZAYgDCEwmDyMYULmZaRnNqPCj ``` diff --git a/docs/onchain_guide.md b/docs/onchain_guide.md index 602d03b..0da9901 100644 --- a/docs/onchain_guide.md +++ b/docs/onchain_guide.md @@ -28,7 +28,7 @@ To use the Wallet CLI, the following subcommands and options are available: To open allocation towards a deployment, provide the deployment IPFS hash, the token amount, the current epoch number (should later be resolved automaically), and fill in the `allocate` subcommand with the necessary arguments: ```shell -✗ subfile-exchange wallet \ +✗ file-exchange wallet \ --mnemonic \ --provider \ allocate \ @@ -51,7 +51,7 @@ To open allocation towards a deployment, provide the deployment IPFS hash, the t Grab the IPFS hash of the deployment you want to allocate to and decide the allocation amount. Note that you can only open 1 allocation per deployment per epoch. ```shell -✗ cargo run -p subfile-exchange wallet \ +✗ cargo run -p file-exchange wallet \ --mnemonic "mnemonic phrase" \ --provider "http://localhost:8545" \ allocate \ @@ -62,25 +62,25 @@ Grab the IPFS hash of the deployment you want to allocate to and decide the allo With RUST_LOG turned on, you can expect the following logs upon success ``` - INFO subfile_exchange::transaction_manager: Initialize transaction manager - at subfile-exchange/src/transaction_manager/mod.rs:32 + INFO file_exchange::transaction_manager: Initialize transaction manager + at file-exchange/src/transaction_manager/mod.rs:32 - INFO subfile_exchange::transaction_manager::staking: allocate params, dep_bytes: [241, 64, 71, 78, 218, 63, 159, 91, 130, 173, 178, 168, 30, 254, 183, 20, 225, 131, 35, 230, 52, 85, 74, 196, 40, 255, 173, 61, 144, 126, 223, 33], tokens: Some(Allocate(AllocateArgs { tokens: 256, deployment_ipfs: "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v", + INFO file_exchange::transaction_manager::staking: allocate params, dep_bytes: [241, 64, 71, 78, 218, 63, 159, 91, 130, 173, 178, 168, 30, 254, 183, 20, 225, 131, 35, 230, 52, 85, 74, 196, 40, 255, 173, 61, 144, 126, 223, 33], tokens: Some(Allocate(AllocateArgs { tokens: 256, deployment_ipfs: "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v", epoch: 101 })), allocation_id: 0x75e11e0f2319913c28d0b1916b4b1d9a1ac3977b, metadata: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], proof: Bytes(0xac1b36d68ea95ebe9f9793850cf083a031d40806121dc2dac525423c50611d18053f195627f6bffe036b9325b4dfd273959457b5d3f1c1b53095c096182756bb1b) - at subfile-exchange/src/transaction_manager/staking.rs:67 + at file-exchange/src/transaction_manager/staking.rs:67 - DEBUG subfile_exchange::transaction_manager::staking: estimate gas, estimated_gas: 379109 - at subfile-exchange/src/transaction_manager/staking.rs:82 + DEBUG file_exchange::transaction_manager::staking: estimate gas, estimated_gas: 379109 + at file-exchange/src/transaction_manager/staking.rs:82 - INFO subfile_exchange: Allocation transaction finished, allocation_id: 0x75e11e0f2319913c28d0b1916b4b1d9a1ac3977b, tx_receipt: Some(TransactionReceipt { transaction_hash: 0x835b790326abf1555545920265e54d5bfbaba150aef31820529736e6727c7a0a, ... }) - at subfile-exchange/src/main.rs:75 + INFO file_exchange: Allocation transaction finished, allocation_id: 0x75e11e0f2319913c28d0b1916b4b1d9a1ac3977b, tx_receipt: Some(TransactionReceipt { transaction_hash: 0x835b790326abf1555545920265e54d5bfbaba150aef31820529736e6727c7a0a, ... }) + at file-exchange/src/main.rs:75 ``` **Closing allocation** Grab the ID of allocation you want to close, and populate the unallocate subcommand ``` -✗ cargo run -p subfile-exchange wallet \ +✗ cargo run -p file-exchange wallet \ --mnemonic "mnemonic" \ --provider "provider_url" \ unallocate --allocation-id 0xe37b9ee6d657ab5700e8a964a8fcc8b39cdefd73 @@ -88,15 +88,15 @@ Grab the ID of allocation you want to close, and populate the unallocate subcomm You can expect logs as follows ``` - INFO subfile_exchange::transaction_manager: Initialize transaction manager - at subfile-exchange/src/transaction_manager/mod.rs:32 + INFO file_exchange::transaction_manager: Initialize transaction manager + at file-exchange/src/transaction_manager/mod.rs:32 - INFO subfile_exchange::transaction_manager::staking: unallocate params, allocation_id: 0xe37b9ee6d657ab5700e8a964a8fcc8b39cdefd73 - at subfile-exchange/src/transaction_manager/staking.rs:142 + INFO file_exchange::transaction_manager::staking: unallocate params, allocation_id: 0xe37b9ee6d657ab5700e8a964a8fcc8b39cdefd73 + at file-exchange/src/transaction_manager/staking.rs:142 - DEBUG subfile_exchange::transaction_manager::staking: estimate gas, estimated_gas: 390965 - at subfile-exchange/src/transaction_manager/staking.rs:154 + DEBUG file_exchange::transaction_manager::staking: estimate gas, estimated_gas: 390965 + at file-exchange/src/transaction_manager/staking.rs:154 - INFO subfile_exchange: Transaction result, result: Ok((0xe37b9ee6d657ab5700e8a964a8fcc8b39cdefd73, Some(TransactionReceipt { transaction_hash: 0xd5c7c4d3dbd4aa8f845f87f8225aef91e927fe7cd5a1cd02085b0d30a59f4743, transaction_index: 1, block_hash: Some(0xcb46a88b2a37648a38165ca3740248b9a2a41e01f3b56f65f59b33f5cbf00fd0), block_number: Some(5738566), from: 0xe9a1cabd57700b17945fd81feefba82340d9568f, to: Some(0x865365c425f3a593ffe698d9c4e6707d14d51e08), cumulative_gas_used: 345329, gas_used: Some(345329), contract_address: None, logs: [...], status: Some(1), root: None, logs_bloom: ..., transaction_type: Some(2), effective_gas_price: Some(100000000), other: OtherFields { inner: {"gasUsedForL1": String("0x28a70"), "l1BlockNumber": String("0x4d09a3")} } }))) - at subfile-exchange/src/main.rs:88 + INFO file_exchange: Transaction result, result: Ok((0xe37b9ee6d657ab5700e8a964a8fcc8b39cdefd73, Some(TransactionReceipt { transaction_hash: 0xd5c7c4d3dbd4aa8f845f87f8225aef91e927fe7cd5a1cd02085b0d30a59f4743, transaction_index: 1, block_hash: Some(0xcb46a88b2a37648a38165ca3740248b9a2a41e01f3b56f65f59b33f5cbf00fd0), block_number: Some(5738566), from: 0xe9a1cabd57700b17945fd81feefba82340d9568f, to: Some(0x865365c425f3a593ffe698d9c4e6707d14d51e08), cumulative_gas_used: 345329, gas_used: Some(345329), contract_address: None, logs: [...], status: Some(1), root: None, logs_bloom: ..., transaction_type: Some(2), effective_gas_price: Some(100000000), other: OtherFields { inner: {"gasUsedForL1": String("0x28a70"), "l1BlockNumber": String("0x4d09a3")} } }))) + at file-exchange/src/main.rs:88 ``` diff --git a/docs/publisher_guide.md b/docs/publisher_guide.md index 5aa1da7..995e0fc 100644 --- a/docs/publisher_guide.md +++ b/docs/publisher_guide.md @@ -2,22 +2,22 @@ This documentation provides a quick guide to publish verification files for indexed data files on IPFS. -## To publish a subfile +## To publish a Bundle To start, you would need to provide several configurations ### Requirements -Publisher must have read access to all files contained in the package. The publisher publish 1 subfile at a time and is not responsible for hosting the file after publishing. The publisher should chunk all the files in the package and generate a hash for all the chunks. Then the publisher will build a hierarchy with the hashes. Currently, publisher simply put chunk hashes in a list for each individual files, publish individual chunk files, then they build a subfile that contains a list of the chunk file addresses. +Publisher must have read access to all files contained in the package. The publisher publish 1 Bundle at a time and is not responsible for hosting the file after publishing. The publisher should chunk all the files in the package and generate a hash for all the chunks. Then the publisher will build a hierarchy with the hashes. Currently, publisher simply put chunk hashes in a list for each individual files, publish individual file manifests, then they build a Bundle that contains a list of the file manifest addresses. > More exploration for hashing/packaging architecture ### CLI example ``` -➜ subfile-exchange git:(main) ✗ cargo run -p subfile-exchange publisher \ +➜ file-exchange git:(main) ✗ cargo run -p file-exchange publisher \ --read-dir ./example-file/ \ - --subfile-name "blah" \ + --Bundle-name "blah" \ --file-names example0017686312.dbin,example-create-17686085.dbin \ --file-type flatfiles \ --file-version 0.0.0 \ @@ -27,5 +27,5 @@ Publisher must have read access to all files contained in the package. The publi For more information ``` -➜ subfile-exchange git:(main) ✗ cargo run -p subfile-exchange --help +➜ file-exchange git:(main) ✗ cargo run -p file-exchange --help ``` diff --git a/docs/server_guide.md b/docs/server_guide.md index 3dea9bb..88da97f 100644 --- a/docs/server_guide.md +++ b/docs/server_guide.md @@ -53,39 +53,39 @@ The server utilizes a combination of open-source technologies for optimal perfor CLI example ``` -✗ cargo run -p subfile-exchange server \ +✗ cargo run -p file-exchange server \ --host 0.0.0.0 \ --port 5678 \ --mnemonic "seed phrase" \ --admin-auth-token "imadmin" \ --free-query-auth-token "imafriend" \ - --subfiles "QmHash00:./example-file/,QmHash01:SUBFILE_PATH" + --bundles "QmHash00:./example-file/,QmHash01:BUNDLE_PATH" ``` -Run `cargo run -p subfile-exchange --help` for more configurations and the corresponding ENV variable names. +Run `cargo run -p file-exchange --help` for more configurations and the corresponding ENV variable names. 3. Access the server via the **admin** endpoint. -HTTP request example to get, add, and remove subfile services +HTTP request example to get, add, and remove file services ``` ✗ curl http://localhost:5678/admin -X POST \ -H "Content-Type: application/json" \ -H "AUTHORIZATION: Bearer imadmin" \ - --data '{"method":"add_subfile","params":["QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ:./example-file"],"id":1,"jsonrpc":"2.0"}' -Subfile(s) added successfully% + --data '{"method":"add_bundle","params":["QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ:./example-file"],"id":1,"jsonrpc":"2.0"}' +Manifest(s) added successfully% ✗ curl http://localhost:5678/admin -X POST \ -H "Content-Type: application/json" \ -H "AUTHORIZATION: Bearer imadmin" \ - --data '{"method":"get_subfiles","id":1,"jsonrpc":"2.0"}' + --data '{"method":"get_bundles","id":1,"jsonrpc":"2.0"}' [{ - "ipfs_hash":"QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ","subfile":{"chunk_files":[{"chunk_hashes":["uKD2xdfp1WszuvIP1nFNNTYoZ7zvm2KX6KFElwwBfdI=","TrusR0Z+EYg33o4KRXGvSN910yavCkjD7K3pYImGZaQ="],"chunk_size":1048576,"file_name":"example-create-17686085.dbin","total_bytes":1052737},{"chunk_hashes":["/5jJskCMgWAZIZHWBWcwnaLP8Ax4sOzCq6d9+k2ouE8=",...],"chunk_size":1048576,"file_name":"0017234500.dbin.zst","total_bytes":24817953},...], + "ipfs_hash":"QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ","bundle":{"file_manifests":[{"chunk_hashes":["uKD2xdfp1WszuvIP1nFNNTYoZ7zvm2KX6KFElwwBfdI=","TrusR0Z+EYg33o4KRXGvSN910yavCkjD7K3pYImGZaQ="],"chunk_size":1048576,"file_name":"example-create-17686085.dbin","total_bytes":1052737},{"chunk_hashes":["/5jJskCMgWAZIZHWBWcwnaLP8Ax4sOzCq6d9+k2ouE8=",...],"chunk_size":1048576,"file_name":"0017234500.dbin.zst","total_bytes":24817953},...], "ipfs_hash":"QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ","local_path":"./example-file","manifest":{"block_range":{"end_block":null,"start_block":null},"chain_id":"0","description":"random flatfiles","file_type":"flatfiles","files":[{"hash":"QmSgzLLsQzdRAQRA2d7X3wqLEUTBLSbRe2tqv9rJBy7Wqv","name":"example-create-17686085.dbin"}, ...],"spec_version":"0.0.0"}}}, ...]% ✗ curl http://localhost:5678/admin -X POST \ -H "Content-Type: application/json" \ -H "AUTHORIZATION: Bearer imadmin" \ - --data '{"method":"remove_subfile","params":["QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ"],"id":1,"jsonrpc":"2.0"}' -Subfile(s) removed successfully + --data '{"method":"remove_bundle","params":["QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ"],"id":1,"jsonrpc":"2.0"}' +Manifest(s) removed successfully ``` 4. (TODO) Register the server endpoint on the smart contract. Currently we assume the service endpoint has been registered with indexer-agent (for subgraphs). diff --git a/subfile-exchange/Cargo.toml b/file-exchange/Cargo.toml similarity index 88% rename from subfile-exchange/Cargo.toml rename to file-exchange/Cargo.toml index 104a495..3812db9 100644 --- a/subfile-exchange/Cargo.toml +++ b/file-exchange/Cargo.toml @@ -1,15 +1,17 @@ [package] -name = "subfile-exchange" +name = "file-exchange" version = "0.0.1" edition = "2021" authors = ["hopeyen "] rust-version = "1.72" -description = "Subfile data service - file exchange" +description = "File Hosting Service" readme = "README.md" license = "Apache-2.0" +keywords = ["https", "file"] +categories = ["command-line-interface", "filesystem", "web-programming::http-server", "web-programming::http-client"] [[bin]] -name = "subfile-exchange" +name = "file-exchange" path = "src/main.rs" [dependencies] @@ -80,11 +82,11 @@ name = "read_and_validate_file" harness = false [[bench]] -name = "validate_local_subfile" +name = "validate_local" harness = false [[bench]] -name = "new_chunk_file" +name = "new_file_manifest" harness = false [[bench]] diff --git a/file-exchange/benches/new_file_manifest.rs b/file-exchange/benches/new_file_manifest.rs new file mode 100644 index 0000000..8847c2d --- /dev/null +++ b/file-exchange/benches/new_file_manifest.rs @@ -0,0 +1,35 @@ +use criterion::async_executor::FuturesExecutor; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use file_exchange::{ + manifest::{local_file_system::Store, FileManifest}, + test_util::CHUNK_SIZE, +}; +fn new_file_manifest_benchmark_file_store(c: &mut Criterion) { + // FileManifest::new(&self.config.read_dir, file_name, self.config.chunk_size) + let read_dir = black_box("../example-file"); + let file_name = black_box("0017234600.dbin.zst"); + let file_size = black_box(CHUNK_SIZE); + + c.bench_function("new_file_manifest_benchmark_file_store", |b| { + b.iter(|| FileManifest::new(read_dir, file_name, file_size).unwrap()) + }); +} + +fn new_file_manifest_benchmark_object_store(c: &mut Criterion) { + let store = black_box(Store::new("../example-file").unwrap()); + let file_name = black_box("0017234600.dbin.zst"); + let file_size = black_box(Some(CHUNK_SIZE as usize)); + + c.bench_function("new_file_manifest_benchmark_object_store", |b| { + b.to_async(FuturesExecutor) + .iter(|| store.file_manifest(file_name, file_size)) + }); +} + +criterion_group!( + benches, + new_file_manifest_benchmark_file_store, + new_file_manifest_benchmark_object_store +); +criterion_main!(benches); diff --git a/subfile-exchange/benches/read_and_validate_file.rs b/file-exchange/benches/read_and_validate_file.rs similarity index 56% rename from subfile-exchange/benches/read_and_validate_file.rs rename to file-exchange/benches/read_and_validate_file.rs index 0e6717c..788c7af 100644 --- a/subfile-exchange/benches/read_and_validate_file.rs +++ b/file-exchange/benches/read_and_validate_file.rs @@ -1,13 +1,13 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use subfile_exchange::test_util::simple_subfile; +use file_exchange::test_util::simple_bundle; fn read_and_validate_file_benchmark(c: &mut Criterion) { - let subfile = black_box(simple_subfile()); + let bundle = black_box(simple_bundle()); c.bench_function("read_and_validate_file", |b| { - let meta = black_box(subfile.chunk_files.first().unwrap()); - b.iter(|| subfile.read_and_validate_file(meta)) + let meta = black_box(bundle.file_manifests.first().unwrap()); + b.iter(|| bundle.read_and_validate_file(meta)) }); } diff --git a/subfile-exchange/benches/read_chunk.rs b/file-exchange/benches/read_chunk.rs similarity index 94% rename from subfile-exchange/benches/read_chunk.rs rename to file-exchange/benches/read_chunk.rs index e0b5cd5..00f429c 100644 --- a/subfile-exchange/benches/read_chunk.rs +++ b/file-exchange/benches/read_chunk.rs @@ -1,6 +1,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use file_exchange::manifest::file_reader::read_chunk; use std::{fs::File, path::PathBuf}; -use subfile_exchange::subfile::file_reader::read_chunk; fn read_chunk_benchmark(c: &mut Criterion) { let file_path = black_box(PathBuf::from("../example-file/0017234600.dbin.zst")); diff --git a/file-exchange/benches/validate_local.rs b/file-exchange/benches/validate_local.rs new file mode 100644 index 0000000..49ff8bc --- /dev/null +++ b/file-exchange/benches/validate_local.rs @@ -0,0 +1,15 @@ +use criterion::black_box; +use criterion::Criterion; + +use criterion::{criterion_group, criterion_main}; +use file_exchange::test_util::simple_bundle; + +fn validate_local_bundle_benchmark(c: &mut Criterion) { + let bundle = black_box(simple_bundle()); + c.bench_function("validate_local_bundle", |b| { + b.iter(|| bundle.validate_local_bundle()) + }); +} + +criterion_group!(benches, validate_local_bundle_benchmark); +criterion_main!(benches); diff --git a/subfile-exchange/benches/verify_chunk.rs b/file-exchange/benches/verify_chunk.rs similarity index 59% rename from subfile-exchange/benches/verify_chunk.rs rename to file-exchange/benches/verify_chunk.rs index ce3d437..56e6360 100644 --- a/subfile-exchange/benches/verify_chunk.rs +++ b/file-exchange/benches/verify_chunk.rs @@ -4,19 +4,19 @@ use criterion::black_box; use criterion::Criterion; use criterion::{criterion_group, criterion_main}; -use subfile_exchange::subfile::{file_hasher::verify_chunk, file_reader::read_chunk}; -use subfile_exchange::test_util::simple_chunk_file; +use file_exchange::manifest::{file_hasher::verify_chunk, file_reader::read_chunk}; +use file_exchange::test_util::simple_file_manifest; fn verify_chunk_benchmark(c: &mut Criterion) { let file_path = black_box(PathBuf::from("../example-file/0017234600.dbin.zst")); - let chunk_file = black_box(simple_chunk_file()); + let file_manifest = black_box(simple_file_manifest()); // read a chunk let (start, end) = black_box(( - chunk_file.total_bytes / chunk_file.chunk_size * chunk_file.chunk_size, - chunk_file.total_bytes - 1, + file_manifest.total_bytes / file_manifest.chunk_size * file_manifest.chunk_size, + file_manifest.total_bytes - 1, )); let data = black_box(read_chunk(&file_path, (start, end)).unwrap()); - let last_hash = black_box(chunk_file.chunk_hashes.last().unwrap()); + let last_hash = black_box(file_manifest.chunk_hashes.last().unwrap()); c.bench_function("verify_chunk", |b| { b.iter(|| verify_chunk(&data, last_hash)) diff --git a/subfile-exchange/build.rs b/file-exchange/build.rs similarity index 100% rename from subfile-exchange/build.rs rename to file-exchange/build.rs diff --git a/subfile-exchange/src/config.rs b/file-exchange/src/config.rs similarity index 93% rename from subfile-exchange/src/config.rs rename to file-exchange/src/config.rs index 5d1f458..047b8fe 100644 --- a/subfile-exchange/src/config.rs +++ b/file-exchange/src/config.rs @@ -12,8 +12,8 @@ use crate::util::parse_key; #[derive(Clone, Debug, Parser, Serialize, Deserialize)] #[command( - name = "subfile-exchange", - about = "A CLI for subfile exchanges", + name = "file-exchange", + about = "A CLI for file hosting exchanges", author = "hopeyen" )] #[command(author, version, about, long_about = None, arg_required_else_help = true)] @@ -112,7 +112,7 @@ pub struct DownloaderArgs { long, value_name = "IPFS_HASH", env = "IPFS_HASH", - help = "IPFS hash for the target subfile.yaml" + help = "IPFS hash for the target bundle.yaml" )] pub ipfs_hash: String, #[arg( @@ -178,8 +178,7 @@ pub struct DownloaderArgs { pub max_retry: u64, } -/// Publisher should take the files, generate subfiles, and publish to IPFS -/// Start with supporting a single file +/// Publisher takes the files, generate bundle manifest, and publish to IPFS //TODO: a single command to publish a range of files #[derive(Clone, Debug, Args, Serialize, Deserialize, Default)] #[group(required = false, multiple = true)] @@ -188,8 +187,8 @@ pub struct PublisherArgs { long, value_name = "YAML_STORE_DIR", env = "YAML_STORE_DIR", - default_value = "./example-file/subfile.yaml", - help = "Path to the directory to store the generated yaml file for subfile" + default_value = "./example-file/bundle.yaml", + help = "Path to the directory to store the generated yaml file for bundle" )] pub yaml_store: String, @@ -203,18 +202,18 @@ pub struct PublisherArgs { #[arg( long, - value_name = "SUBFILE_NAME", - env = "SUBFILE_NAME", - help = "Name for the subfile (later this can be interactive)" + value_name = "BUNDLE_NAME", + env = "BUNDLE_NAME", + help = "Name for the bundle (later this can be interactive)" )] - pub subfile_name: String, + pub bundle_name: String, #[arg( long, value_name = "FILE_NAMES", value_delimiter = ',', env = "FILE_NAMES", - help = "Name for the files to be included in subfile (later this can be interactive)" + help = "Name for the files to be included in bundle (later this can be interactive)" )] pub file_names: Vec, @@ -235,9 +234,9 @@ pub struct PublisherArgs { env = "FILE_VERSION", //TODO: use enum // value_parser = clap::value_parser!(FileType::from_str), - help = "Subfile specification versioning" + help = "Bundle versioning" )] - pub file_version: String, + pub bundle_version: String, #[arg( long, @@ -276,7 +275,7 @@ pub struct PublisherArgs { long, value_name = "PUBLISHER_URL", env = "PUBLISHER_URL", - help = "Self promoting endpoint to record inside the subfile (TODO: can update to be a github repository link)" + help = "Self promoting endpoint to record inside the bundle (TODO: can update to be a github repository link)" )] pub publisher_url: Option, @@ -285,7 +284,7 @@ pub struct PublisherArgs { value_name = "DESCRIPTION", env = "DESCRIPTION", default_value = "", - help = "Describe subfile content" + help = "Describe bundle content" )] pub description: String, diff --git a/subfile-exchange/src/subfile_finder/mod.rs b/file-exchange/src/discover/mod.rs similarity index 81% rename from subfile-exchange/src/subfile_finder/mod.rs rename to file-exchange/src/discover/mod.rs index fb4d189..2377aaf 100644 --- a/subfile-exchange/src/subfile_finder/mod.rs +++ b/file-exchange/src/discover/mod.rs @@ -10,46 +10,46 @@ use tokio::sync::Mutex; use crate::errors::Error; -use crate::subfile::{ +use crate::manifest::{ ipfs::IpfsClient, - subfile_reader::{fetch_subfile_from_ipfs, read_subfile}, + manifest_fetcher::{fetch_bundle_from_ipfs, read_bundle}, }; // Pair indexer operator address and indexer service endpoint (operator, indexer_url) // persumeably this should not be handled by clients themselves //TODO: smarter type for tracking available endpoints pub type IndexerEndpoint = (String, String); -// Pair HashMap< ChunkFileIPFS, HashMap< IndexerEndpoint, Vec< MatchedSubfileIPFS > > > +// Pair HashMap< FileManifestIPFS, HashMap< IndexerEndpoint, Vec< MatchedManifestIPFS > > > pub type FileAvailbilityMap = Arc>>>>>>; -pub struct SubfileFinder { +pub struct Finder { ipfs_client: IpfsClient, http_client: reqwest::Client, } -impl SubfileFinder { +impl Finder { pub fn new(ipfs_client: IpfsClient) -> Self { - SubfileFinder { + Finder { ipfs_client, http_client: reqwest::Client::new(), } } /// Endpoint must serve operator info and the requested file - async fn subfile_availability( + async fn bundle_availability( &self, - subfile_hash: &str, + bundle_hash: &str, url: &str, ) -> Result { let files = self.indexer_status(url).await?; let operator: String = self.indexer_operator(url).await?; - if !files.contains(&subfile_hash.to_string()) { + if !files.contains(&bundle_hash.to_string()) { tracing::trace!( url, files = tracing::field::debug(&files), - "IPFS hash not found in served subfile status" + "IPFS hash not found in served bundle status" ); return Err(Error::DataUnavilable(format!( "IPFS hash not found in files served at {}", @@ -60,18 +60,18 @@ impl SubfileFinder { Ok((operator, url.to_string())) } - /// Check the availability of a subfile at various indexer endpoints - /// Return a list of endpoints where the desired subfile is hosted - pub async fn subfile_availabilities( + /// Check the availability of a bundle at various indexer endpoints + /// Return a list of endpoints where the desired bundle is hosted + pub async fn bundle_availabilities( &self, - subfile_hash: &str, + bundle_hash: &str, endpoint_checklist: &[String], ) -> Vec { - tracing::debug!(subfile_hash, "Checking availability"); + tracing::debug!(bundle_hash, "Checking availability"); // Use a stream to process the endpoints in parallel let results = stream::iter(endpoint_checklist) - .map(|url| self.subfile_availability(subfile_hash, url)) + .map(|url| self.bundle_availability(bundle_hash, url)) .buffer_unordered(endpoint_checklist.len()) // Parallelize up to the number of endpoints .collect::>>() .await; @@ -89,18 +89,18 @@ impl SubfileFinder { pub async fn file_discovery( &self, - subfile_hash: &str, + bundle_hash: &str, endpoint_checklist: &[String], ) -> Result { - let subfile = read_subfile(&self.ipfs_client, subfile_hash, PathBuf::new()).await?; - // To fill in availability for each file, get a vector of (IndexerEndpoint, SubfileIPFS) that serves the file + let bundle = read_bundle(&self.ipfs_client, bundle_hash, PathBuf::new()).await?; + // To fill in availability for each file, get a vector of (IndexerEndpoint, ManifestIPFS) that serves the file let target_hashes: FileAvailbilityMap = Arc::new(Mutex::new( - subfile - .chunk_files + bundle + .file_manifests .iter() - .map(|chunk_file| { + .map(|file_manifest| { ( - chunk_file.meta_info.hash.clone(), + file_manifest.meta_info.hash.clone(), Arc::new(Mutex::new(HashMap::new())), ) }) @@ -125,12 +125,12 @@ impl SubfileFinder { ) -> Result<(), Error> { let operator = self.indexer_operator(url).await?; let indexer_endpoint = (operator, url.to_string()); - let subfiles = self.indexer_status(url).await?; + let bundles = self.indexer_status(url).await?; - // Map of indexer_endpoints to served Subfiles + // Map of indexer_endpoints to served manifests // For each endpoint, populate indexer_map with the available files - for subfile in subfiles { - let file_hashes: Vec = fetch_subfile_from_ipfs(&self.ipfs_client, &subfile) + for bundle in bundles { + let file_hashes: Vec = fetch_bundle_from_ipfs(&self.ipfs_client, &bundle) .await? .files .iter() @@ -138,14 +138,14 @@ impl SubfileFinder { .collect(); let file_map_lock = file_map.lock().await; for (target_file, availability_map) in file_map_lock.iter() { - // Record serving indexer and subfile for each target file + // Record serving indexer and bundle for each target file if file_hashes.contains(target_file) { availability_map .lock() .await .entry(indexer_endpoint.clone()) - .and_modify(|e| e.push(subfile.clone())) - .or_insert(vec![subfile.clone()]); + .and_modify(|e| e.push(bundle.clone())) + .or_insert(vec![bundle.clone()]); } } } diff --git a/subfile-exchange/src/subfile_client/mod.rs b/file-exchange/src/download_client/mod.rs similarity index 82% rename from subfile-exchange/src/subfile_client/mod.rs rename to file-exchange/src/download_client/mod.rs index 2587b39..398cacc 100644 --- a/subfile-exchange/src/subfile_client/mod.rs +++ b/file-exchange/src/download_client/mod.rs @@ -16,12 +16,12 @@ use std::time::Duration; use tokio::sync::Mutex; use crate::config::DownloaderArgs; +use crate::discover::{Finder, IndexerEndpoint}; use crate::errors::Error; -use crate::subfile::{ - file_hasher::verify_chunk, ipfs::IpfsClient, subfile_reader::read_subfile, ChunkFileMeta, - Subfile, +use crate::manifest::{ + file_hasher::verify_chunk, ipfs::IpfsClient, manifest_fetcher::read_bundle, Bundle, + FileManifestMeta, }; -use crate::subfile_finder::{IndexerEndpoint, SubfileFinder}; use crate::util::build_wallet; @@ -29,33 +29,33 @@ use self::signer::{ReceiptSigner, TapReceipt}; pub mod signer; -pub struct SubfileDownloader { +pub struct Downloader { http_client: reqwest::Client, ipfs_hash: String, - subfile: Subfile, + bundle: Bundle, _gateway_url: Option, static_endpoints: Vec, output_dir: String, free_query_auth_token: Option, indexer_urls: Arc>>, indexer_blocklist: Arc>>, - // key is the chunk file identifier (IPFS hash) and value is a HashSet of downloaded chunk indices + // key is the file manifest identifier (IPFS hash) and value is a HashSet of downloaded chunk indices target_chunks: Arc>>>, chunk_max_retry: u64, - subfile_finder: SubfileFinder, + bundle_finder: Finder, #[allow(dead_code)] receipt_signer: ReceiptSigner, } -impl SubfileDownloader { +impl Downloader { pub async fn new(ipfs_client: IpfsClient, args: DownloaderArgs) -> Self { - let subfile = read_subfile( + let bundle = read_bundle( &ipfs_client, &args.ipfs_hash, args.output_dir.clone().into(), ) .await - .expect("Read subfile"); + .expect("Read bundle"); let wallet = build_wallet(&args.mnemonic).expect("Mnemonic build wallet"); let signing_key = wallet.signer().to_bytes(); @@ -77,10 +77,10 @@ impl SubfileDownloader { ) .await; - SubfileDownloader { + Downloader { http_client: reqwest::Client::new(), ipfs_hash: args.ipfs_hash, - subfile, + bundle, _gateway_url: args.gateway_url, static_endpoints: args.indexer_endpoints, output_dir: args.output_dir, @@ -89,7 +89,7 @@ impl SubfileDownloader { indexer_blocklist: Arc::new(StdMutex::new(HashSet::new())), target_chunks: Arc::new(StdMutex::new(HashMap::new())), chunk_max_retry: args.max_retry, - subfile_finder: SubfileFinder::new(ipfs_client), + bundle_finder: Finder::new(ipfs_client), receipt_signer, } } @@ -108,69 +108,69 @@ impl SubfileDownloader { } /// Read manifest to prepare chunks download - pub fn target_chunks(&self, subfile: &Subfile) { - for chunk_file_meta in &subfile.chunk_files { + pub fn target_chunks(&self, bundle: &Bundle) { + for file_manifest_meta in &bundle.file_manifests { let mut target_chunks = self.target_chunks.lock().unwrap(); let chunks_set = target_chunks - .entry(chunk_file_meta.meta_info.hash.clone()) + .entry(file_manifest_meta.meta_info.hash.clone()) .or_default(); - let chunk_size = chunk_file_meta.chunk_file.chunk_size; - for i in 0..(chunk_file_meta.chunk_file.total_bytes / chunk_size + 1) { + let chunk_size = file_manifest_meta.file_manifest.chunk_size; + for i in 0..(file_manifest_meta.file_manifest.total_bytes / chunk_size + 1) { chunks_set.insert(i); } } } - /// Read subfile manifiest and download the individual chunk files + /// Read bundle manifiest and download the individual file manifests //TODO: update once there is payment - pub async fn download_subfile(&self) -> Result<(), Error> { - self.target_chunks(&self.subfile); + pub async fn download_bundle(&self) -> Result<(), Error> { + self.target_chunks(&self.bundle); tracing::info!( chunks = tracing::field::debug(self.target_chunks.clone()), - "Chunk files download starting" + "File manifests download starting" ); - // check subfile availability from gateway/indexer_endpoints + // check bundle availability from gateway/indexer_endpoints let _ = self.availbility_check().await; - // Loop through chunk files for downloading + // Loop through file manifests for downloading let mut incomplete_files = vec![]; - for chunk_file in &self.subfile.chunk_files { - if let Err(e) = self.download_chunk_file(chunk_file.clone()).await { + for file_manifest in &self.bundle.file_manifests { + if let Err(e) = self.download_file_manifest(file_manifest.clone()).await { incomplete_files.push(e); } } - //TODO: retry for failed subfiles + //TODO: retry for failed bundles if !incomplete_files.is_empty() { let msg = format!( - "Chunk files download incomplete: {:#?}", + "File manifests download incomplete: {:#?}", tracing::field::debug(&incomplete_files), ); tracing::warn!(msg); return Err(Error::DataUnavilable(msg)); } else { - tracing::info!("Chunk files download completed"); + tracing::info!("File manifests download completed"); } Ok(()) } /// Get the remaining chunks to download for a file - pub fn remaining_chunks(&self, chunk_file_hash: &String) -> Vec { + pub fn remaining_chunks(&self, file_manifest_hash: &String) -> Vec { self.target_chunks .lock() .unwrap() - .get(chunk_file_hash) + .get(file_manifest_hash) .map(|chunks| chunks.clone().into_iter().collect()) .unwrap_or_default() } /// Download a file by reading its chunk manifest //TODO: update once there is payment - pub async fn download_chunk_file(&self, meta: ChunkFileMeta) -> Result<(), Error> { + pub async fn download_file_manifest(&self, meta: FileManifestMeta) -> Result<(), Error> { tracing::debug!( file_spec = tracing::field::debug(&meta), - "Download chunk file" + "Download file manifest" ); // Open the output file @@ -184,7 +184,7 @@ impl SubfileDownloader { // Wait for all chunk tasks to complete and collect the results let mut handles = Vec::new(); for i in self.remaining_chunks(&meta.meta_info.hash) { - let chunk_file_hash = meta.meta_info.hash.clone(); + let file_manifest_hash = meta.meta_info.hash.clone(); let client = self.http_client.clone(); //TODO: can utilize operator address for on-chain checks let request = self.download_range_request(&meta, i, file.clone())?; @@ -200,7 +200,7 @@ impl SubfileDownloader { target_chunks .lock() .unwrap() - .entry(chunk_file_hash) + .entry(file_manifest_hash) .or_default() .remove(&i); tracing::trace!(i, "Chunk downloaded"); @@ -214,7 +214,7 @@ impl SubfileDownloader { }; tracing::warn!( err = e.to_string(), - "Chunk file download incomplete" + "File manifest download incomplete" ); block_list .lock() @@ -246,7 +246,7 @@ impl SubfileDownloader { /// Generate a request to download a chunk fn download_range_request( &self, - meta: &ChunkFileMeta, + meta: &FileManifestMeta, i: u64, file: Arc>, ) -> Result { @@ -257,7 +257,7 @@ impl SubfileDownloader { operator, url, chunk = i, - chunk_file = meta.meta_info.hash, + file_manifest = meta.meta_info.hash, "Querying operator" ); url.clone() @@ -266,15 +266,15 @@ impl SubfileDownloader { tracing::warn!(err_msg); return Err(Error::DataUnavilable(err_msg.to_string())); }; - //TODO: do no add ipfs_hash here, construct query_endpoint after updating route 'subfiles/id/:id' - let query_endpoint = url + "/subfiles/id/" + &self.ipfs_hash; + //TODO: do no add ipfs_hash here, construct query_endpoint after updating route 'bundles/id/:id' + let query_endpoint = url + "/bundles/id/" + &self.ipfs_hash; let file_hash = meta.meta_info.hash.clone(); - let start = i * meta.chunk_file.chunk_size; + let start = i * meta.file_manifest.chunk_size; let end = u64::min( - start + meta.chunk_file.chunk_size, - meta.chunk_file.total_bytes, + start + meta.file_manifest.chunk_size, + meta.file_manifest.total_bytes, ) - 1; - let chunk_hash = meta.chunk_file.chunk_hashes[i as usize].clone(); + let chunk_hash = meta.file_manifest.chunk_hashes[i as usize].clone(); Ok(DownloadRangeRequest { query_endpoint, @@ -299,26 +299,26 @@ impl SubfileDownloader { .collect::>(); self.update_indexer_urls( &self - .subfile_finder - .subfile_availabilities(&self.ipfs_hash, endpoints) + .bundle_finder + .bundle_availabilities(&self.ipfs_hash, endpoints) .await, ); let indexer_endpoints = self.indexer_urls.lock().unwrap().clone(); if indexer_endpoints.is_empty() { tracing::warn!( - subfile_hash = &self.ipfs_hash, - "No endpoint satisfy the subfile requested, sieve through available subfiles for individual files" + bundle_hash = &self.ipfs_hash, + "No endpoint satisfy the bundle requested, sieve through available bundles for individual files" ); // check files availability from gateway/indexer_endpoints match self - .subfile_finder + .bundle_finder .file_discovery(&self.ipfs_hash, endpoints) .await { Ok(map) => { let msg = format!( - "Files available on these available subfiles: {:#?}", + "Files available on these available bundles: {:#?}", tracing::field::debug(&map.lock().await), ); return Err(Error::DataUnavilable(msg)); @@ -408,7 +408,7 @@ async fn download_chunk_and_write_to_file( } } -/// Make range request for a file to the subfile server +/// Make range request for a file to the bundle server async fn request_chunk( http_client: &Client, query_endpoint: &str, @@ -457,9 +457,9 @@ async fn request_chunk( } } -/// extract base indexer_url from `indexer_url/subfiles/id/subfile_id` +/// extract base indexer_url from `indexer_url/bundles/id/bundle_id` fn extract_base_url(query_endpoint: &str) -> Option<&str> { - if let Some(index) = query_endpoint.find("/subfiles/id/") { + if let Some(index) = query_endpoint.find("/bundles/id/") { Some(&query_endpoint[..index]) } else { None diff --git a/subfile-exchange/src/subfile_client/signer.rs b/file-exchange/src/download_client/signer.rs similarity index 100% rename from subfile-exchange/src/subfile_client/signer.rs rename to file-exchange/src/download_client/signer.rs diff --git a/subfile-exchange/src/errors.rs b/file-exchange/src/errors.rs similarity index 96% rename from subfile-exchange/src/errors.rs rename to file-exchange/src/errors.rs index 4c177b9..efe4b58 100644 --- a/subfile-exchange/src/errors.rs +++ b/file-exchange/src/errors.rs @@ -6,7 +6,7 @@ pub enum Error { FileIOError(std::io::Error), InvalidRange(String), IPFSError(reqwest::Error), - SubfileError(String), + ManifestError(String), Request(reqwest::Error), DataUnavilable(String), ChunkInvalid(String), @@ -26,7 +26,7 @@ impl fmt::Display for Error { Error::FileIOError(ref err) => write!(f, "File IO error: {}", err), Error::InvalidRange(ref msg) => write!(f, "Invalid range: {}", msg), Error::IPFSError(ref err) => write!(f, "IPFS error: {}", err), - Error::SubfileError(ref msg) => write!(f, "Subfile error: {}", msg), + Error::ManifestError(ref msg) => write!(f, "Manifest error: {}", msg), Error::Request(ref err) => write!(f, "Client error: {}", err), Error::DataUnavilable(ref err) => write!(f, "Client error: {}", err), Error::ChunkInvalid(ref err) => write!(f, "Chunk invalid error: {}", err), diff --git a/subfile-exchange/src/lib.rs b/file-exchange/src/lib.rs similarity index 63% rename from subfile-exchange/src/lib.rs rename to file-exchange/src/lib.rs index 52135eb..9899341 100644 --- a/subfile-exchange/src/lib.rs +++ b/file-exchange/src/lib.rs @@ -1,9 +1,9 @@ pub mod config; +pub mod discover; +pub mod download_client; pub mod errors; +pub mod manifest; pub mod publisher; -pub mod subfile; -pub mod subfile_client; -pub mod subfile_finder; pub mod test_util; pub mod transaction_manager; pub mod util; diff --git a/subfile-exchange/src/main.rs b/file-exchange/src/main.rs similarity index 89% rename from subfile-exchange/src/main.rs rename to file-exchange/src/main.rs index 06d4508..ff3c551 100644 --- a/subfile-exchange/src/main.rs +++ b/file-exchange/src/main.rs @@ -1,10 +1,10 @@ use dotenv::dotenv; -use subfile_exchange::{ +use file_exchange::{ config::{Cli, OnchainAction, Role}, - publisher::SubfilePublisher, - subfile::ipfs::IpfsClient, - subfile_client::SubfileDownloader, + download_client::Downloader, + manifest::ipfs::IpfsClient, + publisher::ManifestPublisher, transaction_manager::TransactionManager, }; @@ -28,10 +28,10 @@ async fn main() { "Downloader request" ); // Create client - let downloader = SubfileDownloader::new(client, config).await; + let downloader = Downloader::new(client, config).await; // Send range request - match downloader.download_subfile().await { + match downloader.download_bundle().await { Ok(res) => { tracing::info!("Download result: {:#?}", res); } @@ -43,7 +43,7 @@ async fn main() { Role::Publisher(config) => { tracing::info!(config = tracing::field::debug(&config), "Publisher request"); - let publisher = SubfilePublisher::new(client, config); + let publisher = ManifestPublisher::new(client, config); match publisher.publish().await { Ok(r) => { diff --git a/subfile-exchange/src/subfile/file_hasher.rs b/file-exchange/src/manifest/file_hasher.rs similarity index 78% rename from subfile-exchange/src/subfile/file_hasher.rs rename to file-exchange/src/manifest/file_hasher.rs index c4bf2b1..2a1c3e5 100644 --- a/subfile-exchange/src/subfile/file_hasher.rs +++ b/file-exchange/src/manifest/file_hasher.rs @@ -49,7 +49,7 @@ pub fn verify_chunk(data: &Bytes, chunk_hash: &str) -> bool { #[cfg(test)] mod tests { - use crate::{subfile::file_reader::chunk_file, subfile::ChunkFile, test_util::*}; + use crate::{manifest::file_reader::chunk_file, manifest::FileManifest, test_util::*}; use std::path::Path; #[test] @@ -69,11 +69,11 @@ mod tests { let file_name1 = path1.file_name().unwrap().to_str().unwrap(); let file_name2 = path2.file_name().unwrap().to_str().unwrap(); - // produce the same chunk file - let chunk_file1 = ChunkFile::new(readdir1, file_name1, CHUNK_SIZE).unwrap(); - let chunk_file2 = ChunkFile::new(readdir2, file_name2, CHUNK_SIZE).unwrap(); + // produce the same file manifest + let file_manifest1 = FileManifest::new(readdir1, file_name1, CHUNK_SIZE).unwrap(); + let file_manifest2 = FileManifest::new(readdir2, file_name2, CHUNK_SIZE).unwrap(); - assert_eq!(chunk_file1.chunk_hashes, chunk_file2.chunk_hashes); + assert_eq!(file_manifest1.chunk_hashes, file_manifest2.chunk_hashes); // Clean up drop(temp_file1); @@ -94,11 +94,11 @@ mod tests { let file_name1 = path1.file_name().unwrap().to_str().unwrap(); let file_name2 = path2.file_name().unwrap().to_str().unwrap(); - // produce different chunk file - let chunk_file1 = ChunkFile::new(readdir1, file_name1, CHUNK_SIZE).unwrap(); - let chunk_file2 = ChunkFile::new(readdir2, file_name2, CHUNK_SIZE).unwrap(); + // produce different file manifest + let file_manifest1 = FileManifest::new(readdir1, file_name1, CHUNK_SIZE).unwrap(); + let file_manifest2 = FileManifest::new(readdir2, file_name2, CHUNK_SIZE).unwrap(); - assert_ne!(chunk_file1.chunk_hashes, chunk_file2.chunk_hashes); + assert_ne!(file_manifest1.chunk_hashes, file_manifest2.chunk_hashes); // Clean up drop(temp_file1); @@ -114,11 +114,11 @@ mod tests { let readdir = path.parent().unwrap().to_str().unwrap(); let file_name = path.file_name().unwrap().to_str().unwrap(); - // produce the same chunk file - let chunk_file1 = ChunkFile::new(readdir, file_name, file_size).unwrap(); - let chunk_file2 = ChunkFile::new(readdir, file_name, file_size).unwrap(); + // produce the same file manifest + let file_manifest1 = FileManifest::new(readdir, file_name, file_size).unwrap(); + let file_manifest2 = FileManifest::new(readdir, file_name, file_size).unwrap(); - assert_eq!(chunk_file1, chunk_file2); + assert_eq!(file_manifest1, file_manifest2); // Clean up drop(temp_file1); @@ -143,11 +143,11 @@ mod tests { let readdir2 = path2.parent().unwrap().to_str().unwrap(); let file_name2 = path2.file_name().unwrap().to_str().unwrap(); - // produce different chunk file - let chunk_file1 = ChunkFile::new(readdir1, file_name1, file_size).unwrap(); - let chunk_file2 = ChunkFile::new(readdir2, file_name2, file_size).unwrap(); + // produce different file manifest + let file_manifest1 = FileManifest::new(readdir1, file_name1, file_size).unwrap(); + let file_manifest2 = FileManifest::new(readdir2, file_name2, file_size).unwrap(); - assert_ne!(chunk_file1.chunk_hashes, chunk_file2.chunk_hashes); + assert_ne!(file_manifest1.chunk_hashes, file_manifest2.chunk_hashes); // Clean up drop(temp_file1); diff --git a/subfile-exchange/src/subfile/file_reader.rs b/file-exchange/src/manifest/file_reader.rs similarity index 100% rename from subfile-exchange/src/subfile/file_reader.rs rename to file-exchange/src/manifest/file_reader.rs diff --git a/subfile-exchange/src/subfile/ipfs.rs b/file-exchange/src/manifest/ipfs.rs similarity index 100% rename from subfile-exchange/src/subfile/ipfs.rs rename to file-exchange/src/manifest/ipfs.rs diff --git a/subfile-exchange/src/subfile/local_file_system.rs b/file-exchange/src/manifest/local_file_system.rs similarity index 97% rename from subfile-exchange/src/subfile/local_file_system.rs rename to file-exchange/src/manifest/local_file_system.rs index e6235bc..5f0a8c1 100644 --- a/subfile-exchange/src/subfile/local_file_system.rs +++ b/file-exchange/src/manifest/local_file_system.rs @@ -12,10 +12,10 @@ use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; -use crate::subfile::Error; +use crate::manifest::Error; use super::file_hasher::hash_chunk; -use super::ChunkFile; +use super::FileManifest; pub struct Store { local_file_system: Arc, @@ -159,11 +159,11 @@ impl Store { .map_err(Error::ObjectStoreError) } - pub async fn chunk_file( + pub async fn file_manifest( &self, location: &str, chunk_size: Option, - ) -> Result { + ) -> Result { let parts = self.multipart_read(location, chunk_size).await?; let total_bytes = parts.iter().map(|b| b.len() as u64).sum(); let byte_size_used = parts @@ -175,7 +175,7 @@ impl Store { .len(); let chunk_hashes = parts.iter().map(|c| hash_chunk(c)).collect(); - Ok(ChunkFile { + Ok(FileManifest { total_bytes, chunk_size: byte_size_used as u64, chunk_hashes, @@ -188,7 +188,7 @@ mod tests { use rand::{distributions::DistString, thread_rng}; use crate::{ - subfile::local_file_system::*, + manifest::local_file_system::*, test_util::{create_random_temp_file, CHUNK_SIZE}, }; diff --git a/subfile-exchange/src/subfile/subfile_reader.rs b/file-exchange/src/manifest/manifest_fetcher.rs similarity index 56% rename from subfile-exchange/src/subfile/subfile_reader.rs rename to file-exchange/src/manifest/manifest_fetcher.rs index debe2a0..275dee4 100644 --- a/subfile-exchange/src/subfile/subfile_reader.rs +++ b/file-exchange/src/manifest/manifest_fetcher.rs @@ -4,12 +4,12 @@ use serde::de::DeserializeOwned; use crate::{ errors::Error, - subfile::ipfs::IpfsClient, - subfile::{ChunkFile, ChunkFileMeta, Subfile, SubfileManifest}, + manifest::ipfs::IpfsClient, + manifest::{Bundle, BundleManifest, FileManifest, FileManifestMeta}, }; -/// Parse yaml into Subfile manifest -pub fn parse_subfile_manifest(yaml: serde_yaml::Value) -> Result { +/// Parse yaml into Bundle manifest +pub fn parse_bundle_manifest(yaml: serde_yaml::Value) -> Result { serde_yaml::from_value(yaml).map_err(Error::YamlError) } @@ -18,11 +18,11 @@ pub fn parse_yaml(yaml: serde_yaml::Value) -> Result Result { +) -> Result { // Fetch the content from IPFS let timeout = Duration::from_secs(10); @@ -32,7 +32,7 @@ pub async fn fetch_subfile_from_ipfs( .map_err(Error::IPFSError)?; let content: serde_yaml::Value = serde_yaml::from_str( - &String::from_utf8(file_bytes.to_vec()).map_err(|e| Error::SubfileError(e.to_string()))?, + &String::from_utf8(file_bytes.to_vec()).map_err(|e| Error::ManifestError(e.to_string()))?, ) .map_err(Error::YamlError)?; @@ -41,27 +41,24 @@ pub async fn fetch_subfile_from_ipfs( "Read file content" ); - let subfile = parse_subfile_manifest(content)?; + let bundle = parse_bundle_manifest(content)?; - tracing::debug!( - subfile = tracing::field::debug(&subfile), - "subfile manifest" - ); + tracing::debug!(bundle = tracing::field::debug(&bundle), "bundle manifest"); - Ok(subfile) + Ok(bundle) } -/// Parse yaml into a chunk file -pub fn parse_chunk_file(yaml: serde_yaml::Value) -> Result { +/// Parse yaml into a file manifest +pub fn parse_file_manifest(yaml: serde_yaml::Value) -> Result { serde_yaml::from_value(yaml).map_err(Error::YamlError) } -// Fetch subfile yaml from IPFS -pub async fn fetch_chunk_file_from_ipfs( +// Fetch file manifest yaml from IPFS +pub async fn fetch_file_manifest_from_ipfs( client: &IpfsClient, ipfs_hash: &str, -) -> Result { - tracing::debug!(ipfs_hash, "Fetch chunk file from IPFS"); +) -> Result { + tracing::debug!(ipfs_hash, "Fetch file manifest from IPFS"); // Fetch the content from IPFS let timeout = Duration::from_secs(10); @@ -71,7 +68,7 @@ pub async fn fetch_chunk_file_from_ipfs( .map_err(Error::IPFSError)?; let content: serde_yaml::Value = serde_yaml::from_str( - &String::from_utf8(file_bytes.to_vec()).map_err(|e| Error::SubfileError(e.to_string()))?, + &String::from_utf8(file_bytes.to_vec()).map_err(|e| Error::ManifestError(e.to_string()))?, ) .map_err(Error::YamlError)?; @@ -80,34 +77,34 @@ pub async fn fetch_chunk_file_from_ipfs( "Read file content" ); - let chunk_file = parse_chunk_file(content)?; + let file_manifest = parse_file_manifest(content)?; - Ok(chunk_file) + Ok(file_manifest) } -/// Read subfile from IPFS, build a version relative to local access -pub async fn read_subfile( +/// Read bundle from IPFS, build a version relative to local access +pub async fn read_bundle( client: &IpfsClient, ipfs: &str, local_path: PathBuf, -) -> Result { - let manifest = fetch_subfile_from_ipfs(client, ipfs).await?; +) -> Result { + let manifest = fetch_bundle_from_ipfs(client, ipfs).await?; // Get and Parse the YAML file to get chunk hashes - let mut chunk_files = vec![]; + let mut file_manifests = vec![]; for file_info in &manifest.files { - let chunk_file = fetch_chunk_file_from_ipfs(client, &file_info.hash).await?; + let file_manifest = fetch_file_manifest_from_ipfs(client, &file_info.hash).await?; - chunk_files.push(ChunkFileMeta { + file_manifests.push(FileManifestMeta { meta_info: file_info.clone(), - chunk_file, + file_manifest, }); } - Ok(Subfile { + Ok(Bundle { ipfs_hash: ipfs.to_string(), local_path, manifest, - chunk_files, + file_manifests, }) } diff --git a/subfile-exchange/src/subfile/mod.rs b/file-exchange/src/manifest/mod.rs similarity index 65% rename from subfile-exchange/src/subfile/mod.rs rename to file-exchange/src/manifest/mod.rs index e611f66..b323ddd 100644 --- a/subfile-exchange/src/subfile/mod.rs +++ b/file-exchange/src/manifest/mod.rs @@ -2,7 +2,7 @@ pub mod file_hasher; pub mod file_reader; pub mod ipfs; pub mod local_file_system; -pub mod subfile_reader; +pub mod manifest_fetcher; use std::{ path::{Path, PathBuf}, @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use crate::{ errors::Error, - subfile::{ + manifest::{ file_hasher::{hash_chunk, verify_chunk}, file_reader::{chunk_file, format_path, read_chunk}, ipfs::is_valid_ipfs_hash, @@ -22,9 +22,9 @@ use crate::{ /* Public Manifests */ -/// Better mapping of files and chunk files +/// Better mapping of files and file manifests #[derive(Serialize, Deserialize, Clone, Debug)] -pub struct SubfileManifest { +pub struct BundleManifest { pub files: Vec, pub file_type: String, pub spec_version: String, @@ -43,16 +43,16 @@ pub struct FileMetaInfo { // pub block_range: BlockRange, } -/* Chunk file */ +/* File manifest */ #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone)] -pub struct ChunkFile { +pub struct FileManifest { pub total_bytes: u64, pub chunk_size: u64, pub chunk_hashes: Vec, } -impl ChunkFile { - pub fn new(read_dir: &str, file_name: &str, chunk_size: u64) -> Result { +impl FileManifest { + pub fn new(read_dir: &str, file_name: &str, chunk_size: u64) -> Result { let file_path = format_path(read_dir, file_name); // let merkle_root = hex::encode(merkle_tree.root()); // let chunk_hashes: Vec = merkle_tree.nodes().iter().map(hex::encode).collect(); @@ -60,7 +60,7 @@ impl ChunkFile { let chunk_hashes: Vec = chunks.iter().map(|c| hash_chunk(c)).collect(); - Ok(ChunkFile { + Ok(FileManifest { total_bytes, chunk_size, chunk_hashes, @@ -69,20 +69,20 @@ impl ChunkFile { } #[derive(Debug, serde::Serialize, serde::Deserialize, Eq, PartialEq, Clone)] -pub struct ChunkFileMeta { +pub struct FileManifestMeta { pub meta_info: FileMetaInfo, - pub chunk_file: ChunkFile, + pub file_manifest: FileManifest, } -/* Subfile - packaging of chunk files mapped into local files */ +/* Bundle - packaging of file manifests mapped into local files */ //TODO: Add GraphQL derivation #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Subfile { +pub struct Bundle { pub ipfs_hash: String, pub local_path: PathBuf, - pub manifest: SubfileManifest, - /// IPFS hash, Chunk file spec - pub chunk_files: Vec, + pub manifest: BundleManifest, + /// IPFS hash, File manifest spec + pub file_manifests: Vec, } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -110,16 +110,16 @@ pub struct BlockRange { // } // } -impl Subfile { - /// Validate the local files against a given subfile specification - pub fn validate_local_subfile(&self) -> Result<&Self, Error> { +impl Bundle { + /// Validate the local files against a given bundle specification + pub fn validate_local_bundle(&self) -> Result<&Self, Error> { tracing::debug!( - subfile = tracing::field::debug(self), - "Read and verify subfile" + bundle = tracing::field::debug(self), + "Read and verify bundle" ); - // Read all files in subfile to verify locally. This may cause a long initialization time - for file_meta in &self.chunk_files { + // Read all files in bundle to verify locally. This may cause a long initialization time + for file_meta in &self.file_manifests { self.read_and_validate_file(file_meta)?; } @@ -128,30 +128,30 @@ impl Subfile { } /// Read and validate file - pub fn read_and_validate_file(&self, file: &ChunkFileMeta) -> Result<(), Error> { - // read file by chunk_file.file_name + pub fn read_and_validate_file(&self, file: &FileManifestMeta) -> Result<(), Error> { + // read file by file_manifest.file_name let meta_info = &file.meta_info; - let chunk_file = &file.chunk_file; + let file_manifest = &file.file_manifest; let mut file_path = self.local_path.clone(); file_path.push(meta_info.name.clone()); tracing::trace!( file_path = tracing::field::debug(&file_path), - chunk_file = tracing::field::debug(&chunk_file), + file_manifest = tracing::field::debug(&file_manifest), "Verify file" ); - // loop through chunk file byte range - for i in 0..(chunk_file.total_bytes / chunk_file.chunk_size + 1) { + // loop through file manifest byte range + for i in 0..(file_manifest.total_bytes / file_manifest.chunk_size + 1) { // read range - let start = i * chunk_file.chunk_size; - let end = u64::min(start + chunk_file.chunk_size, chunk_file.total_bytes) - 1; + let start = i * file_manifest.chunk_size; + let end = u64::min(start + file_manifest.chunk_size, file_manifest.total_bytes) - 1; tracing::trace!( i, start_byte = tracing::field::debug(&start), end_byte = tracing::field::debug(&end), "Verify chunk index" ); - let chunk_hash = chunk_file.chunk_hashes[i as usize].clone(); + let chunk_hash = file_manifest.chunk_hashes[i as usize].clone(); // read chunk let chunk_data = read_chunk(&file_path, (start, end))?; @@ -173,19 +173,19 @@ impl Subfile { } } -/// Validate the subfile configurations at initialization -pub fn validate_subfile_entries(entries: Vec) -> Result, Error> { +/// Validate the bundle configurations at initialization +pub fn validate_bundle_entries(entries: Vec) -> Result, Error> { let mut results = Vec::new(); for entry in entries { - results.push(validate_subfile_entry(entry)?); + results.push(validate_bundle_entry(entry)?); } Ok(results) } -/// Subfile entry must be in the format of "valid_ipfs_hash:valid_local_path" -pub fn validate_subfile_entry(entry: String) -> Result<(String, PathBuf), Error> { +/// Bundle entry must be in the format of "valid_ipfs_hash:valid_local_path" +pub fn validate_bundle_entry(entry: String) -> Result<(String, PathBuf), Error> { let parts: Vec<&str> = entry.split(':').collect(); if parts.len() != 2 { return Err(Error::InvalidConfig(format!( @@ -218,39 +218,39 @@ pub fn validate_subfile_entry(entry: String) -> Result<(String, PathBuf), Error> #[cfg(test)] mod tests { - use crate::test_util::simple_subfile; + use crate::test_util::simple_bundle; #[test] fn test_read_and_validate_file() { - let mut subfile = simple_subfile(); - let file_meta = subfile.chunk_files.first().unwrap(); - let result = subfile.read_and_validate_file(file_meta); + let mut bundle = simple_bundle(); + let file_meta = bundle.file_manifests.first().unwrap(); + let result = bundle.read_and_validate_file(file_meta); assert!(result.is_ok()); // Add tests for failure cases - if let Some(file_meta) = subfile.chunk_files.first_mut() { - if let Some(first_hash) = file_meta.chunk_file.chunk_hashes.first_mut() { + if let Some(file_meta) = bundle.file_manifests.first_mut() { + if let Some(first_hash) = file_meta.file_manifest.chunk_hashes.first_mut() { *first_hash += "1"; } } - let file_meta = subfile.chunk_files.first().unwrap(); - let result = subfile.read_and_validate_file(file_meta); + let file_meta = bundle.file_manifests.first().unwrap(); + let result = bundle.read_and_validate_file(file_meta); assert!(result.is_err()); } #[test] - fn test_validate_local_subfile() { - let mut subfile = simple_subfile(); - let result = subfile.validate_local_subfile(); + fn test_validate_local_bundle() { + let mut bundle = simple_bundle(); + let result = bundle.validate_local_bundle(); assert!(result.is_ok()); // Add tests for failure cases - if let Some(file_meta) = subfile.chunk_files.first_mut() { - if let Some(first_hash) = file_meta.chunk_file.chunk_hashes.first_mut() { + if let Some(file_meta) = bundle.file_manifests.first_mut() { + if let Some(first_hash) = file_meta.file_manifest.chunk_hashes.first_mut() { *first_hash += "1"; } } - let result = subfile.validate_local_subfile(); + let result = bundle.validate_local_bundle(); assert!(result.is_err()); } } diff --git a/subfile-exchange/src/publisher/mod.rs b/file-exchange/src/publisher/mod.rs similarity index 61% rename from subfile-exchange/src/publisher/mod.rs rename to file-exchange/src/publisher/mod.rs index 8218a5f..43882c1 100644 --- a/subfile-exchange/src/publisher/mod.rs +++ b/file-exchange/src/publisher/mod.rs @@ -2,28 +2,28 @@ use serde_yaml::to_string; use crate::config::PublisherArgs; use crate::errors::Error; -use crate::subfile::local_file_system::Store; -use crate::subfile::{ +use crate::manifest::local_file_system::Store; +use crate::manifest::{ ipfs::{AddResponse, IpfsClient}, - BlockRange, ChunkFile, FileMetaInfo, SubfileManifest, + BlockRange, BundleManifest, FileManifest, FileMetaInfo, }; -pub struct SubfilePublisher { +pub struct ManifestPublisher { ipfs_client: IpfsClient, config: PublisherArgs, } -impl SubfilePublisher { +impl ManifestPublisher { pub fn new(ipfs_client: IpfsClient, config: PublisherArgs) -> Self { - SubfilePublisher { + ManifestPublisher { ipfs_client, config, } } - /// Takes file_path, create chunk_file, build merkle tree, publish, write to output + /// Takes file_path, create file_manifest, build merkle tree, publish, write to output pub async fn hash_and_publish_file(&self, file_name: &str) -> Result { - let yaml_str = self.write_chunk_file(file_name)?; + let yaml_str = self.write_file_manifest(file_name)?; let added: AddResponse = self .ipfs_client @@ -58,14 +58,14 @@ impl SubfilePublisher { Ok(root_hashes) } - pub fn construct_subfile_manifest( + pub fn construct_bundle_manifest( &self, file_meta_info: Vec, ) -> Result { - let manifest = SubfileManifest { + let manifest = BundleManifest { files: file_meta_info, file_type: self.config.file_type.clone(), - spec_version: self.config.file_version.clone(), + spec_version: self.config.bundle_version.clone(), description: self.config.description.clone(), chain_id: self.config.chain_id.clone(), block_range: BlockRange { @@ -77,7 +77,7 @@ impl SubfilePublisher { Ok(yaml) } - pub async fn publish_subfile_manifest(&self, manifest_yaml: &str) -> Result { + pub async fn publish_bundle_manifest(&self, manifest_yaml: &str) -> Result { let ipfs_hash = self .ipfs_client .add(manifest_yaml.as_bytes().to_vec()) @@ -95,11 +95,11 @@ impl SubfilePublisher { meta_info = tracing::field::debug(&meta_info), "hash_and_publish_files", ); - match self.construct_subfile_manifest(meta_info) { + match self.construct_bundle_manifest(meta_info) { Ok(manifest_yaml) => { - let ipfs_hash = self.publish_subfile_manifest(&manifest_yaml).await?; + let ipfs_hash = self.publish_bundle_manifest(&manifest_yaml).await?; tracing::info!( - "Published subfile manifest to IPFS with hash: {}", + "Published bundle manifest to IPFS with hash: {}", &ipfs_hash ); Ok(ipfs_hash) @@ -108,17 +108,18 @@ impl SubfilePublisher { } } - pub fn write_chunk_file(&self, file_name: &str) -> Result { - let chunk_file = ChunkFile::new(&self.config.read_dir, file_name, self.config.chunk_size)?; + pub fn write_file_manifest(&self, file_name: &str) -> Result { + let file_manifest = + FileManifest::new(&self.config.read_dir, file_name, self.config.chunk_size)?; // let merkle_tree = build_merkle_tree(chunks); - // let chunk_file = create_chunk_file(&merkle_tree); + // let file_manifest = create_file_manifest(&merkle_tree); tracing::trace!( - file = tracing::field::debug(&chunk_file), - "Created chunk file" + file = tracing::field::debug(&file_manifest), + "Created file manifest" ); - let yaml = to_string(&chunk_file).map_err(Error::YamlError)?; + let yaml = to_string(&file_manifest).map_err(Error::YamlError)?; // TODO: consider storing a local copy // let mut output_file = File::create(file_path)?; // output_file.write_all(yaml.as_bytes())?; @@ -126,18 +127,18 @@ impl SubfilePublisher { Ok(yaml) } - pub async fn object_store_write_chunk_file(&self, file_name: &str) -> Result { + pub async fn object_store_write_file_manifest(&self, file_name: &str) -> Result { let store = Store::new(&self.config.read_dir)?; - let chunk_file = store - .chunk_file(file_name, Some(self.config.chunk_size as usize)) + let file_manifest = store + .file_manifest(file_name, Some(self.config.chunk_size as usize)) .await?; tracing::trace!( - file = tracing::field::debug(&chunk_file), - "Created chunk file" + file = tracing::field::debug(&file_manifest), + "Created file manifest" ); - let yaml = to_string(&chunk_file).map_err(Error::YamlError)?; + let yaml = to_string(&file_manifest).map_err(Error::YamlError)?; Ok(yaml) } } @@ -147,21 +148,24 @@ mod tests { use super::*; #[tokio::test] - async fn test_write_chunk_file() { + async fn test_write_file_manifest() { let client = IpfsClient::localhost(); let args = PublisherArgs { read_dir: String::from("../example-file"), chunk_size: 1048576, ..Default::default() }; - let publisher = SubfilePublisher::new(client, args); + let publisher = ManifestPublisher::new(client, args); let name = "example-create-17686085.dbin"; // Hash and publish a single file - let chunk_file_yaml = publisher.write_chunk_file(name).unwrap(); - let chunk_file_yaml2 = publisher.object_store_write_chunk_file(name).await.unwrap(); + let file_manifest_yaml = publisher.write_file_manifest(name).unwrap(); + let file_manifest_yaml2 = publisher + .object_store_write_file_manifest(name) + .await + .unwrap(); - assert_eq!(chunk_file_yaml, chunk_file_yaml2); + assert_eq!(file_manifest_yaml, file_manifest_yaml2); } #[tokio::test] @@ -172,25 +176,22 @@ mod tests { read_dir: String::from("../example-file"), ..Default::default() }; - let builder = SubfilePublisher::new(client, args); + let builder = ManifestPublisher::new(client, args); let name = "example-create-17686085.dbin"; - // let chunks1 = chunk_file(Path::new(&path))?; + // let chunks1 = file_manifest(Path::new(&path))?; // Hash and publish a single file let hash = builder.hash_and_publish_file(name).await.unwrap().hash; - // Construct and publish a subfile manifest + // Construct and publish a bundle manifest let meta_info = vec![FileMetaInfo { name: name.to_string(), hash, }]; - if let Ok(manifest_yaml) = builder.construct_subfile_manifest(meta_info) { - if let Ok(ipfs_hash) = builder.publish_subfile_manifest(&manifest_yaml).await { - tracing::info!( - "Published subfile manifest to IPFS with hash: {}", - ipfs_hash - ); + if let Ok(manifest_yaml) = builder.construct_bundle_manifest(meta_info) { + if let Ok(ipfs_hash) = builder.publish_bundle_manifest(&manifest_yaml).await { + tracing::info!("Published bundle manifest to IPFS with hash: {}", ipfs_hash); } } } diff --git a/subfile-exchange/src/test_util.rs b/file-exchange/src/test_util.rs similarity index 92% rename from subfile-exchange/src/test_util.rs rename to file-exchange/src/test_util.rs index d44eacd..b827398 100644 --- a/subfile-exchange/src/test_util.rs +++ b/file-exchange/src/test_util.rs @@ -7,8 +7,8 @@ use std::time::Duration; use tempfile::NamedTempFile; use crate::config::init_tracing; -use crate::subfile::{ - BlockRange, ChunkFile, ChunkFileMeta, FileMetaInfo, Subfile, SubfileManifest, +use crate::manifest::{ + BlockRange, Bundle, BundleManifest, FileManifest, FileManifestMeta, FileMetaInfo, }; pub const CHUNK_SIZE: u64 = 1024 * 1024; // Define the chunk size, e.g., 1 MB @@ -48,8 +48,8 @@ pub fn modify_random_element(matrix: &mut [Vec]) -> Vec> { matrix.to_vec() } -pub fn simple_chunk_file() -> ChunkFile { - ChunkFile { +pub fn simple_file_manifest() -> FileManifest { + FileManifest { // file_name: "0017234600.dbin.zst".to_string(), total_bytes: 26359000, chunk_size: 1048576, @@ -85,15 +85,15 @@ pub fn simple_chunk_file() -> ChunkFile { } } -pub fn simple_subfile() -> Subfile { +pub fn simple_bundle() -> Bundle { let meta_info = FileMetaInfo { name: "0017234600.dbin.zst".to_string(), hash: "QmadNB1AQnap3czUime3gEETBNUj7HHzww6hVh5F6w7Boo".to_string(), }; - Subfile { + Bundle { ipfs_hash: "QmUqx9seQqAuCRi3uEPfa1rcS61rKhM7JxtraL81jvY6dZ".to_string(), local_path: "../example-file/".into(), - manifest: SubfileManifest { + manifest: BundleManifest { files: [meta_info.clone()].to_vec(), file_type: "flatfiles".to_string(), spec_version: "0.0.0".to_string(), @@ -104,16 +104,16 @@ pub fn simple_subfile() -> Subfile { end_block: None, }, }, - chunk_files: [ChunkFileMeta { + file_manifests: [FileManifestMeta { meta_info, - chunk_file: simple_chunk_file(), + file_manifest: simple_file_manifest(), }] .to_vec(), } } pub fn init_logger() { - env::set_var("RUST_LOG", "warn,subfile_exchange=trace"); + env::set_var("RUST_LOG", "warn,file_exchange=trace"); init_tracing(String::from("pretty")).unwrap(); } diff --git a/subfile-exchange/src/transaction_manager/mod.rs b/file-exchange/src/transaction_manager/mod.rs similarity index 100% rename from subfile-exchange/src/transaction_manager/mod.rs rename to file-exchange/src/transaction_manager/mod.rs diff --git a/subfile-exchange/src/transaction_manager/staking.rs b/file-exchange/src/transaction_manager/staking.rs similarity index 100% rename from subfile-exchange/src/transaction_manager/staking.rs rename to file-exchange/src/transaction_manager/staking.rs diff --git a/subfile-exchange/src/util.rs b/file-exchange/src/util.rs similarity index 100% rename from subfile-exchange/src/util.rs rename to file-exchange/src/util.rs diff --git a/subfile-exchange/tests/allocate.rs b/file-exchange/tests/allocate.rs similarity index 88% rename from subfile-exchange/tests/allocate.rs rename to file-exchange/tests/allocate.rs index a365006..c1af9cb 100644 --- a/subfile-exchange/tests/allocate.rs +++ b/file-exchange/tests/allocate.rs @@ -3,7 +3,7 @@ mod tests { use chrono::Utc; use ethers_core::types::U256; - use subfile_exchange::{ + use file_exchange::{ config::{AllocateArgs, OnchainAction, WalletArgs}, transaction_manager::TransactionManager, }; @@ -12,15 +12,15 @@ mod tests { #[ignore] async fn test_allocate() { // 1. Basic setup; const - std::env::set_var("RUST_LOG", "off,subfile_exchange=debug,allocate=trace"); - subfile_exchange::config::init_tracing(String::from("pretty")).unwrap(); + std::env::set_var("RUST_LOG", "off,file_exchange=debug,allocate=trace"); + file_exchange::config::init_tracing(String::from("pretty")).unwrap(); let wallet_args = WalletArgs { mnemonic: String::from( "sheriff obscure trick beauty army fat wink legal flee leader section suit", ), provider: String::from("https://arbitrum-sepolia.infura.io/v3/aaaaaaaaaaaaaaaaaaaa"), verifier: Some(String::from("0xfC24cE7a4428A6B89B52645243662A02BA734ECF")), - action: Some(subfile_exchange::config::OnchainAction::Allocate( + action: Some(file_exchange::config::OnchainAction::Allocate( AllocateArgs { tokens: U256::from(100), deployment_ipfs: String::from("QmeKabcCQBtgU6QjM3rp3w6pDHFW4r54ee89nGdhuyDuhi"), diff --git a/subfile-exchange/tests/discovery.rs b/file-exchange/tests/discovery.rs similarity index 50% rename from subfile-exchange/tests/discovery.rs rename to file-exchange/tests/discovery.rs index f339841..2eb400a 100644 --- a/subfile-exchange/tests/discovery.rs +++ b/file-exchange/tests/discovery.rs @@ -2,29 +2,29 @@ mod tests { use std::{process::Command, time::Duration}; - use subfile_exchange::{ - subfile::ipfs::IpfsClient, - subfile_finder::{unavailable_files, FileAvailbilityMap, IndexerEndpoint, SubfileFinder}, + use file_exchange::{ + discover::{unavailable_files, FileAvailbilityMap, Finder, IndexerEndpoint}, + manifest::ipfs::IpfsClient, test_util::server_ready, }; #[tokio::test] async fn test_discovery() { // 0. Basic setup; const - std::env::set_var("RUST_LOG", "off,subfile_exchange=debug,file_transfer=trace"); - subfile_exchange::config::init_tracing(String::from("pretty")).unwrap(); + std::env::set_var("RUST_LOG", "off,file_exchange=debug,file_transfer=trace"); + file_exchange::config::init_tracing(String::from("pretty")).unwrap(); let server_0 = "http://0.0.0.0:5677"; let server_1 = "http://0.0.0.0:5679"; - let chunk_file_hash_a = "QmeKabcCQBtgU6QjM3rp3w6pDHFW4r54ee89nGdhuyDuhi".to_string(); - let chunk_file_hash_b = "QmeE38uPSqT5XuHfM8X2JZAYgDCEwmDyMYULmZaRnNqPCj".to_string(); - let chunk_file_hash_c = "QmWs8dkshZ7abxFYQ3h9ie1Em7SqzAkwtVJXaBapwEWqR9".to_string(); + let file_manifest_hash_a = "QmeKabcCQBtgU6QjM3rp3w6pDHFW4r54ee89nGdhuyDuhi".to_string(); + let file_manifest_hash_b = "QmeE38uPSqT5XuHfM8X2JZAYgDCEwmDyMYULmZaRnNqPCj".to_string(); + let file_manifest_hash_c = "QmWs8dkshZ7abxFYQ3h9ie1Em7SqzAkwtVJXaBapwEWqR9".to_string(); - let subfile_hash_0 = "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v".to_string(); // files: A,B,C - let subfile_hash_1 = "QmVPPWWaraEvoc4LCrYXtMbL13WPNbnuXV2yo7W8zexFGq".to_string(); // files: A - let subfile_hash_2 = "QmeD3dRVV6Gs84TRwiNj3tLt9mBEMVqy3GoWm7WN8oDzGz".to_string(); // files: B,C - let subfile_hash_3 = "QmTSwj1BGkkmVSnhw6uEGkcxGZvP5nq4pDhzHjwJvsQC2Z".to_string(); // files: B + let bundle_hash_0 = "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v".to_string(); // files: A,B,C + let bundle_hash_1 = "QmVPPWWaraEvoc4LCrYXtMbL13WPNbnuXV2yo7W8zexFGq".to_string(); // files: A + let bundle_hash_2 = "QmeD3dRVV6Gs84TRwiNj3tLt9mBEMVqy3GoWm7WN8oDzGz".to_string(); // files: B,C + let bundle_hash_3 = "QmTSwj1BGkkmVSnhw6uEGkcxGZvP5nq4pDhzHjwJvsQC2Z".to_string(); // files: B let indexer_0: IndexerEndpoint = ( "0xead22a75679608952db6e85537fbfdca02dae9cb".to_string(), @@ -39,21 +39,21 @@ mod tests { let mut server_process_0 = Command::new("cargo") .arg("run") .arg("-p") - .arg("subfile-service") + .arg("file-service") .arg("--") .arg("--port") .arg("5677") .arg("--mnemonic") .arg("sheriff obscure trick beauty army fat wink legal flee leader section suit") - .arg("--subfiles") - .arg(format!("{}:./../example-file/", subfile_hash_0)) + .arg("--bundles") + .arg(format!("{}:./../example-file/", bundle_hash_0)) .spawn() .expect("Failed to start server"); let mut server_process_1 = Command::new("cargo") .arg("run") .arg("-p") - .arg("subfile-service") + .arg("file-service") .arg("--") .arg("--mnemonic") .arg("ice palace drill gadget biology glow tray equip heavy wolf toddler menu") @@ -61,10 +61,10 @@ mod tests { .arg("0.0.0.0") .arg("--port") .arg("5679") - .arg("--subfiles") + .arg("--bundles") .arg(format!( "{}:./../example-file/,{}:./../example-file/,{}:./../example-file/", - subfile_hash_1, subfile_hash_2, subfile_hash_3 + bundle_hash_1, bundle_hash_2, bundle_hash_3 )) .spawn() .expect("Failed to start server"); @@ -77,13 +77,13 @@ mod tests { // 2. Setup finder let client = IpfsClient::new("https://ipfs.network.thegraph.com") .expect("Could not create client to thegraph IPFS gateway"); - let finder = SubfileFinder::new(client); + let finder = Finder::new(client); - // 3. Find various combinations of subfiles - // 3.1 find subfile_0 with server 0 and 1, get server 0 + // 3. Find various combinations of bundles + // 3.1 find bundle_0 with server 0 and 1, get server 0 let endpoints = finder - .subfile_availabilities( - &subfile_hash_0, + .bundle_availabilities( + &bundle_hash_0, &[server_0.to_string(), server_1.to_string()], ) .await; @@ -91,10 +91,10 @@ mod tests { assert!(endpoints.first().unwrap().0 == "0xead22a75679608952db6e85537fbfdca02dae9cb"); assert!(endpoints.first().unwrap().1 == server_0); - // 3.2 find subfile_1 with server 0 and 1, get server 1 + // 3.2 find bundle_1 with server 0 and 1, get server 1 let endpoints = finder - .subfile_availabilities( - &subfile_hash_1, + .bundle_availabilities( + &bundle_hash_1, &[server_0.to_string(), server_1.to_string()], ) .await; @@ -102,91 +102,171 @@ mod tests { assert!(endpoints.first().unwrap().0 == "0x19804e50af1b72db4ce22a3c028e80c78d75af62"); assert!(endpoints.first().unwrap().1 == server_1); - // 3.3 find subfile_0 with sieved availability + // 3.3 find bundle_0 with sieved availability let map = finder .file_discovery( - &subfile_hash_0, + &bundle_hash_0, &[server_0.to_string(), server_1.to_string()], ) .await .unwrap(); assert!(map.lock().await.len() == 3); - assert!(matched(&map, &chunk_file_hash_a, &indexer_0, &vec![&subfile_hash_0]).await); - assert!(matched(&map, &chunk_file_hash_b, &indexer_0, &vec![&subfile_hash_0]).await); - assert!(matched(&map, &chunk_file_hash_c, &indexer_0, &vec![&subfile_hash_0]).await); - assert!(matched(&map, &chunk_file_hash_a, &indexer_1, &vec![&subfile_hash_1]).await); + assert!( + matched( + &map, + &file_manifest_hash_a, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_b, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_c, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_a, + &indexer_1, + &vec![&bundle_hash_1] + ) + .await + ); // update innermost vec to be a hashset to avoid ordering problem assert!( matched( &map, - &chunk_file_hash_b, + &file_manifest_hash_b, &indexer_1, - &vec![&subfile_hash_3, &subfile_hash_2] + &vec![&bundle_hash_3, &bundle_hash_2] ) .await || matched( &map, - &chunk_file_hash_b, + &file_manifest_hash_b, &indexer_1, - &vec![&subfile_hash_2, &subfile_hash_3] + &vec![&bundle_hash_2, &bundle_hash_3] ) .await ); - assert!(matched(&map, &chunk_file_hash_c, &indexer_1, &vec![&subfile_hash_2]).await); + assert!( + matched( + &map, + &file_manifest_hash_c, + &indexer_1, + &vec![&bundle_hash_2] + ) + .await + ); - // 3.4 find subfile_1 with sieved availability, get + // 3.4 find bundle_1 with sieved availability, get let map = finder .file_discovery( - &subfile_hash_1, + &bundle_hash_1, &[server_0.to_string(), server_1.to_string()], ) .await .unwrap(); assert!(map.lock().await.len() == 1); - assert!(matched(&map, &chunk_file_hash_a, &indexer_0, &vec![&subfile_hash_0]).await); - assert!(matched(&map, &chunk_file_hash_a, &indexer_1, &vec![&subfile_hash_1]).await); + assert!( + matched( + &map, + &file_manifest_hash_a, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_a, + &indexer_1, + &vec![&bundle_hash_1] + ) + .await + ); - // 3.5 find subfile_2 with sieved availability, get both 0 and 1 + // 3.5 find bundle_2 with sieved availability, get both 0 and 1 let map = finder .file_discovery( - &subfile_hash_2, + &bundle_hash_2, &[server_0.to_string(), server_1.to_string()], ) .await .unwrap(); assert!(map.lock().await.len() == 2); - assert!(matched(&map, &chunk_file_hash_b, &indexer_0, &vec![&subfile_hash_0]).await); - assert!(matched(&map, &chunk_file_hash_c, &indexer_0, &vec![&subfile_hash_0]).await); assert!( matched( &map, - &chunk_file_hash_b, + &file_manifest_hash_b, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_c, + &indexer_0, + &vec![&bundle_hash_0] + ) + .await + ); + assert!( + matched( + &map, + &file_manifest_hash_b, &indexer_1, - &vec![&subfile_hash_3, &subfile_hash_2] + &vec![&bundle_hash_3, &bundle_hash_2] ) .await || matched( &map, - &chunk_file_hash_b, + &file_manifest_hash_b, &indexer_1, - &vec![&subfile_hash_2, &subfile_hash_3] + &vec![&bundle_hash_2, &bundle_hash_3] ) .await ); - assert!(matched(&map, &chunk_file_hash_c, &indexer_1, &vec![&subfile_hash_2]).await); + assert!( + matched( + &map, + &file_manifest_hash_c, + &indexer_1, + &vec![&bundle_hash_2] + ) + .await + ); // 3.6 large files, not available on neither - let large_subfile_hash = "QmPexYQsJKyhL867xRaGS2kciNDwggCk7pgUxrNoPQSuPL"; // contains File A,B,C,D,E + let large_bundle_hash = "QmPexYQsJKyhL867xRaGS2kciNDwggCk7pgUxrNoPQSuPL"; // contains File A,B,C,D,E let endpoints = finder - .subfile_availabilities( - large_subfile_hash, + .bundle_availabilities( + large_bundle_hash, &[server_0.to_string(), server_1.to_string()], ) .await; assert!(endpoints.is_empty()); let map = finder .file_discovery( - large_subfile_hash, + large_bundle_hash, &[server_0.to_string(), server_1.to_string()], ) .await @@ -207,17 +287,17 @@ mod tests { async fn matched( file_map: &FileAvailbilityMap, - chunk_file: &str, + file_manifest: &str, endpoint: &IndexerEndpoint, - subfile_hashes: &Vec<&str>, + bundle_hashes: &Vec<&str>, ) -> bool { let map = file_map.lock().await; // Check if the key exists in the outer HashMap - let chunk_file_map = map.get(chunk_file).unwrap(); - let inner_map = chunk_file_map.lock().await; + let file_manifest_map = map.get(file_manifest).unwrap(); + let inner_map = file_manifest_map.lock().await; // Check if the endpoint exists in the inner HashMap - let subfiles = inner_map.get(endpoint).unwrap(); - subfile_hashes == subfiles + let bundles = inner_map.get(endpoint).unwrap(); + bundle_hashes == bundles } } diff --git a/subfile-exchange/tests/file_transfer.rs b/file-exchange/tests/file_transfer.rs similarity index 75% rename from subfile-exchange/tests/file_transfer.rs rename to file-exchange/tests/file_transfer.rs index 5595b72..eec6e49 100644 --- a/subfile-exchange/tests/file_transfer.rs +++ b/file-exchange/tests/file_transfer.rs @@ -4,30 +4,30 @@ mod tests { use tempfile::tempdir; use tokio::fs; - use subfile_exchange::{ - config::DownloaderArgs, subfile::ipfs::IpfsClient, subfile_client::SubfileDownloader, + use file_exchange::{ + config::DownloaderArgs, download_client::Downloader, manifest::ipfs::IpfsClient, test_util::server_ready, }; #[tokio::test] #[ignore = "Require working provider url"] async fn test_file_transfer() { - std::env::set_var("RUST_LOG", "off,subfile_exchange=debug,file_transfer=trace"); - subfile_exchange::config::init_tracing(String::from("pretty")).unwrap(); + std::env::set_var("RUST_LOG", "off,file_exchange=debug,file_transfer=trace"); + file_exchange::config::init_tracing(String::from("pretty")).unwrap(); let client = IpfsClient::new("https://ipfs.network.thegraph.com") .expect("Could not create client to thegraph IPFS gateway"); - let target_subfile = "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v".to_string(); + let target_bundle = "QmeaPp764FjQjPB66M9ijmQKmLhwBpHQhA7dEbH2FA1j3v".to_string(); // 1. Setup server let mut server_process = Command::new("cargo") .arg("run") .arg("-p") - .arg("subfile-service") + .arg("file-service") .arg("--") .arg("--mnemonic") .arg("sheriff obscure trick beauty army fat wink legal flee leader section suit") - .arg("--subfiles") - .arg(format!("{}:./../example-file/", target_subfile)) + .arg("--bundles") + .arg(format!("{}:./../example-file/", target_bundle)) .spawn() .expect("Failed to start server"); tracing::debug!("Wait 10 seconds"); @@ -40,7 +40,7 @@ mod tests { let downloader_args = DownloaderArgs { output_dir: output_dir.to_str().unwrap().to_string(), - ipfs_hash: target_subfile, + ipfs_hash: target_bundle, indexer_endpoints: [ "http://localhost:5678".to_string(), "http://localhost:5677".to_string(), @@ -55,10 +55,10 @@ mod tests { ..Default::default() }; - let downloader = SubfileDownloader::new(client, downloader_args).await; + let downloader = Downloader::new(client, downloader_args).await; // 3. Perform the download - let download_result = downloader.download_subfile().await; + let download_result = downloader.download_bundle().await; // 4. Validate the download assert!(download_result.is_ok()); diff --git a/subfile-service/Cargo.toml b/file-service/Cargo.toml similarity index 92% rename from subfile-service/Cargo.toml rename to file-service/Cargo.toml index ad78230..dbd42d8 100644 --- a/subfile-service/Cargo.toml +++ b/file-service/Cargo.toml @@ -1,19 +1,19 @@ [package] -name = "subfile-service" +name = "file-service" version = "0.0.1" edition = "2021" authors = ["hopeyen "] rust-version = "1.72" -description = "Subfile data service" +description = "File hosting service" readme = "README.md" license = "Apache-2.0" [[bin]] -name = "subfile-service" +name = "file-service" path = "src/main.rs" [dependencies] -subfile-exchange = { path = "../subfile-exchange" } +file-exchange = { path = "../file-exchange" } alloy-sol-types = { version = "0.5.0", features = ["eip712-serde"] } alloy-primitives = { version = "0.5.0", features = ["serde"] } anyhow = "1.0" diff --git a/subfile-service/build.rs b/file-service/build.rs similarity index 100% rename from subfile-service/build.rs rename to file-service/build.rs diff --git a/subfile-service/src/config.rs b/file-service/src/config.rs similarity index 93% rename from subfile-service/src/config.rs rename to file-service/src/config.rs index cfd7bef..95148bf 100644 --- a/subfile-service/src/config.rs +++ b/file-service/src/config.rs @@ -9,8 +9,8 @@ use tracing_subscriber::FmtSubscriber; #[derive(Clone, Debug, Parser, Serialize, Deserialize)] #[command( - name = "subfile-service", - about = "Indexers subfile service", + name = "file-service", + about = "Indexer file hosting service", author = "hopeyen" )] #[command(author, version, about, long_about = None, arg_required_else_help = true)] @@ -54,7 +54,7 @@ pub struct ServerArgs { value_name = "HOST", default_value = "127.0.0.1", env = "HOST", - help = "Subfile server host" + help = "File server host" )] pub host: String, #[arg( @@ -62,18 +62,18 @@ pub struct ServerArgs { value_name = "PORT", default_value = "5678", env = "PORT", - help = "Subfile server port" + help = "File server port" )] pub port: usize, // Taking from config right now, later can read from DB table for managing server states #[arg( long, - value_name = "SUBFILES", - env = "SUBFILES", + value_name = "BUNDLES", + env = "BUNDLES", value_delimiter = ',', - help = "Comma separated list of IPFS hashes and local location of the subfiles to serve upon start-up; format: [ipfs_hash:local_path]" + help = "Comma separated list of IPFS hashes and local location of the bundles to serve upon start-up; format: [ipfs_hash:local_path]" )] - pub subfiles: Vec, + pub bundles: Vec, #[clap( long, value_name = "free-query-auth-token", diff --git a/subfile-service/src/subfile_server/admin.rs b/file-service/src/file_server/admin.rs similarity index 78% rename from subfile-service/src/subfile_server/admin.rs rename to file-service/src/file_server/admin.rs index cb888b2..1ffe0f8 100644 --- a/subfile-service/src/subfile_server/admin.rs +++ b/file-service/src/file_server/admin.rs @@ -1,11 +1,11 @@ +use file_exchange::errors::ServerError; +use file_exchange::{ + errors::Error, + manifest::{ipfs::is_valid_ipfs_hash, manifest_fetcher::read_bundle, validate_bundle_entries}, +}; use hyper::body::to_bytes; use hyper::{Body, Request, Response, StatusCode}; use serde_json::{json, Value}; -use subfile_exchange::errors::ServerError; -use subfile_exchange::{ - errors::Error, - subfile::{ipfs::is_valid_ipfs_hash, subfile_reader::read_subfile, validate_subfile_entries}, -}; use super::{create_error_response, ServerContext}; @@ -48,13 +48,13 @@ pub async fn handle_admin_request( tracing::debug!( method = tracing::field::debug(&method), params = tracing::field::debug(¶ms), - "Received valid/authorized subfiles management request" + "Received valid/authorized bundles management request" ); match method.as_str() { - "get_subfiles" => get_subfiles(context).await, - "add_subfile" => add_subfile(params, context).await, - "remove_subfile" => remove_subfile(params, context).await, + "get_bundles" => get_bundles(context).await, + "add_bundle" => add_bundle(params, context).await, + "remove_bundle" => remove_bundle(params, context).await, "update_price_per_byte" => update_price_per_byte(params, context).await, _ => Ok(hyper::Response::builder() .status(hyper::StatusCode::METHOD_NOT_ALLOWED) @@ -80,18 +80,18 @@ async fn parse_admin_request(req: Request) -> Result<(String, Optio Ok((method.to_string(), params.cloned())) } //TODO: rich the details -/// Function to retrieve all subfiles and their details -async fn get_subfiles(context: &ServerContext) -> Result, Error> { +/// Function to retrieve all bundles and their details +async fn get_bundles(context: &ServerContext) -> Result, Error> { let server_state = context.lock().await; - // Create a JSON object or array containing the subfiles' details - let subfiles_info = server_state - .subfiles + // Create a JSON object or array containing the bundles' details + let bundles_info = server_state + .bundles .iter() - .map(|(ipfs_hash, subfile)| json!({ "ipfs_hash": ipfs_hash, "subfile": subfile })) + .map(|(ipfs_hash, bundle)| json!({ "ipfs_hash": ipfs_hash, "bundle": bundle })) .collect::>(); drop(server_state); - let body = match serde_json::to_string(&subfiles_info).map_err(Error::JsonError) { + let body = match serde_json::to_string(&bundles_info).map_err(Error::JsonError) { Ok(b) => b, Err(e) => { return Ok(create_error_response( @@ -100,7 +100,7 @@ async fn get_subfiles(context: &ServerContext) -> Result, Error> )) } }; - tracing::trace!("Built get_subfile response"); + tracing::trace!("Built get_bundle response"); Ok(Response::builder() .status(StatusCode::OK) @@ -108,8 +108,8 @@ async fn get_subfiles(context: &ServerContext) -> Result, Error> .unwrap()) } -/// Add a subfile to the server state -async fn add_subfile( +/// Add a bundle to the server state +async fn add_bundle( params: Option, context: &ServerContext, ) -> Result, Error> { @@ -126,7 +126,7 @@ async fn add_subfile( let entries: Vec = serde_json::from_value(params).map_err(Error::JsonError)?; // Validate before adding to the server state - let subfile_entries = match validate_subfile_entries(entries) { + let bundle_entries = match validate_bundle_entries(entries) { Ok(s) => s, Err(e) => { return Ok(create_error_response( @@ -136,8 +136,8 @@ async fn add_subfile( } }; let mut server_state = context.lock().await; - for (ipfs_hash, local_path) in subfile_entries { - let subfile = match read_subfile(&server_state.client, &ipfs_hash, local_path).await { + for (ipfs_hash, local_path) in bundle_entries { + let bundle = match read_bundle(&server_state.client, &ipfs_hash, local_path).await { Ok(s) => s, Err(e) => { return Ok(create_error_response( @@ -146,7 +146,7 @@ async fn add_subfile( )) } }; - if let Err(e) = subfile.validate_local_subfile() { + if let Err(e) = bundle.validate_local_bundle() { return Ok(create_error_response( &e.to_string(), StatusCode::BAD_REQUEST, @@ -154,18 +154,18 @@ async fn add_subfile( }; server_state - .subfiles - .insert(subfile.ipfs_hash.clone(), subfile); + .bundles + .insert(bundle.ipfs_hash.clone(), bundle); } Ok(Response::builder() .status(StatusCode::OK) - .body("Subfile(s) added successfully".into()) + .body("Bundle(s) added successfully".into()) .unwrap()) } -/// Remove a subfile from the server state -async fn remove_subfile( +/// Remove a bundle from the server state +async fn remove_bundle( params: Option, context: &ServerContext, ) -> Result, Error> { @@ -203,14 +203,14 @@ async fn remove_subfile( // Access the server state let mut server_state = context.lock().await; - // Remove the valid IPFS hashes from the server state's subfiles + // Remove the valid IPFS hashes from the server state's bundles for ipfs_hash in ipfs_hashes { - server_state.subfiles.remove(&ipfs_hash); + server_state.bundles.remove(&ipfs_hash); } Ok(Response::builder() .status(StatusCode::OK) - .body("Subfile(s) removed successfully".into()) + .body("Bundle(s) removed successfully".into()) .unwrap()) } @@ -242,7 +242,7 @@ async fn update_price_per_byte( // Access the server state let mut server_state = context.lock().await; - // Remove the valid IPFS hashes from the server state's subfiles + // Remove the valid IPFS hashes from the server state's bundles server_state.price_per_byte = new_price; Ok(Response::builder() diff --git a/subfile-service/src/subfile_server/mod.rs b/file-service/src/file_server/mod.rs similarity index 81% rename from subfile-service/src/subfile_server/mod.rs rename to file-service/src/file_server/mod.rs index 8dfaf48..1d1c617 100644 --- a/subfile-service/src/subfile_server/mod.rs +++ b/file-service/src/file_server/mod.rs @@ -6,13 +6,13 @@ use std::collections::HashMap; use std::sync::Arc; use tokio::sync::Mutex; -use subfile_exchange::errors::Error; -use subfile_exchange::subfile::{ - ipfs::IpfsClient, subfile_reader::read_subfile, validate_subfile_entries, Subfile, +use file_exchange::errors::Error; +use file_exchange::manifest::{ + ipfs::IpfsClient, manifest_fetcher::read_bundle, validate_bundle_entries, Bundle, }; use crate::config::{Config, ServerArgs}; -use crate::subfile_server::{admin::handle_admin_request, util::public_key}; +use crate::file_server::{admin::handle_admin_request, util::public_key}; // #![cfg(feature = "acceptor")] // use hyper_rustls::TlsAcceptor; use hyper::{Body, Request, Response, StatusCode}; @@ -27,7 +27,7 @@ pub mod util; pub struct ServerState { pub client: IpfsClient, pub operator_public_key: String, - pub subfiles: HashMap, // Keyed by IPFS hash + pub bundles: HashMap, // Keyed by IPFS hash pub release: util::PackageVersion, pub free_query_auth_token: Option, // Add bearer prefix pub admin_auth_token: Option, // Add bearer prefix @@ -50,9 +50,9 @@ pub async fn init_server(config: Config) { .parse() .expect("Invalid address"); - let state = initialize_subfile_server_context(&client, config) + let state = initialize_server_context(&client, config) .await - .expect("Failed to initiate subfile server"); + .expect("Failed to initiate bundle server"); // Create hyper server routes let make_svc = make_service_fn(|_| { @@ -82,8 +82,8 @@ pub async fn init_server(config: Config) { } } -/// Function to initialize the subfile server -async fn initialize_subfile_server_context( +/// Function to initialize the file hosting server +async fn initialize_server_context( client: &IpfsClient, config: ServerArgs, ) -> Result { @@ -92,10 +92,10 @@ async fn initialize_subfile_server_context( "Initializing server context" ); - let subfile_entries = validate_subfile_entries(config.subfiles.clone())?; + let bundle_entries = validate_bundle_entries(config.bundles.clone())?; tracing::debug!( - entries = tracing::field::debug(&subfile_entries), - "Validated subfile entries" + entries = tracing::field::debug(&bundle_entries), + "Validated bundle entries" ); let free_query_auth_token = config @@ -110,7 +110,7 @@ async fn initialize_subfile_server_context( // This would be part of your server state initialization let mut server_state = ServerState { client: client.clone(), - subfiles: HashMap::new(), + bundles: HashMap::new(), release: util::PackageVersion::from(build_info()), free_query_auth_token, admin_auth_token, @@ -120,13 +120,13 @@ async fn initialize_subfile_server_context( }; // Fetch the file using IPFS client - for (ipfs_hash, local_path) in subfile_entries { - let subfile = read_subfile(&server_state.client, &ipfs_hash, local_path).await?; - let _ = subfile.validate_local_subfile(); + for (ipfs_hash, local_path) in bundle_entries { + let bundle = read_bundle(&server_state.client, &ipfs_hash, local_path).await?; + let _ = bundle.validate_local_bundle(); server_state - .subfiles - .insert(subfile.ipfs_hash.clone(), subfile); + .bundles + .insert(bundle.ipfs_hash.clone(), bundle); } // Return the server state wrapped in an Arc for thread safety @@ -151,7 +151,7 @@ pub async fn handle_request( "/cost" => routes::cost(&context).await, "/admin" => handle_admin_request(req, &context).await, //TODO: consider routing through file level IPFS - path if path.starts_with("/subfiles/id/") => { + path if path.starts_with("/bundles/id/") => { routes::file_service(path, &req, &context).await } _ => Ok(Response::builder() diff --git a/subfile-service/src/subfile_server/range.rs b/file-service/src/file_server/range.rs similarity index 97% rename from subfile-service/src/subfile_server/range.rs rename to file-service/src/file_server/range.rs index 33f3f0b..af95564 100644 --- a/subfile-service/src/subfile_server/range.rs +++ b/file-service/src/file_server/range.rs @@ -7,7 +7,7 @@ use std::io::{Read, Seek, SeekFrom}; use std::path::Path; -use subfile_exchange::errors::{Error, ServerError}; +use file_exchange::errors::{Error, ServerError}; // Function to parse the Range header and return the start and end bytes pub fn parse_range_header(range_header: &hyper::header::HeaderValue) -> Result<(u64, u64), Error> { @@ -48,7 +48,7 @@ pub async fn serve_file_range( end_byte = tracing::field::debug(&end), "Serve file range" ); - //TODO: Map the subfile_id to a file path, use server state for the file_map + //TODO: Map the manifest_id to a file path, use server state for the file_map let mut file = match File::open(file_path) { Ok(f) => f, Err(e) => { diff --git a/subfile-service/src/subfile_server/routes.rs b/file-service/src/file_server/routes.rs similarity index 83% rename from subfile-service/src/subfile_server/routes.rs rename to file-service/src/file_server/routes.rs index 4d6f419..922dbca 100644 --- a/subfile-service/src/subfile_server/routes.rs +++ b/file-service/src/file_server/routes.rs @@ -1,9 +1,9 @@ // #![cfg(feature = "acceptor")] use http::header::CONTENT_RANGE; -use subfile_exchange::errors::{Error, ServerError}; +use file_exchange::errors::{Error, ServerError}; -use crate::subfile_server::util::{Health, Operator}; +use crate::file_server::util::{Health, Operator}; // #![cfg(feature = "acceptor")] // use hyper_rustls::TlsAcceptor; use hyper::{Body, Request, Response, StatusCode}; @@ -43,12 +43,12 @@ pub async fn cost(context: &ServerContext) -> Result, Error> { /// Endpoint for status availability pub async fn status(context: &ServerContext) -> Result, Error> { - let subfile_mapping = context.lock().await.subfiles.clone(); - let subfile_ipfses: Vec = subfile_mapping + let bundle_mapping = context.lock().await.bundles.clone(); + let bundle_ipfses: Vec = bundle_mapping .keys() .map(|i| i.to_owned()) .collect::>(); - let json = serde_json::to_string(&subfile_ipfses).map_err(Error::JsonError)?; + let json = serde_json::to_string(&bundle_ipfses).map_err(Error::JsonError)?; tracing::debug!(json, "Serving status"); Response::builder() @@ -76,11 +76,11 @@ pub async fn file_service( context: &ServerContext, ) -> Result, Error> { tracing::debug!("Received file range request"); - let id = path.trim_start_matches("/subfiles/id/"); + let id = path.trim_start_matches("/bundles/id/"); let context_ref = context.lock().await; tracing::debug!( - subfiles = tracing::field::debug(&context_ref), + bundles = tracing::field::debug(&context_ref), id, "Received file range request" ); @@ -104,26 +104,26 @@ pub async fn file_service( .unwrap()); } - let requested_subfile = match context_ref.subfiles.get(id) { + let requested_bundle = match context_ref.bundles.get(id) { Some(s) => s.clone(), None => { tracing::debug!( server_context = tracing::field::debug(&context_ref), id, - "Requested subfile is not served locally" + "Requested bundle is not served locally" ); return Ok(Response::builder() .status(StatusCode::NOT_FOUND) - .body("Subfile not found".into()) + .body("Bundle not found".into()) .unwrap()); } }; match req.headers().get("file_hash") { Some(hash) if hash.to_str().is_ok() => { - let mut file_path = requested_subfile.local_path.clone(); - let chunk_file = match requested_subfile - .chunk_files + let mut file_path = requested_bundle.local_path.clone(); + let file_manifest = match requested_bundle + .file_manifests .iter() .find(|file| file.meta_info.hash == hash.to_str().unwrap()) { @@ -131,11 +131,11 @@ pub async fn file_service( None => { return Ok(Response::builder() .status(StatusCode::NOT_FOUND) - .body("Chunk file not found".into()) + .body("File manifest not found".into()) .unwrap()) } }; - file_path.push(chunk_file.meta_info.name.clone()); + file_path.push(file_manifest.meta_info.name.clone()); // Parse the range header to get the start and end bytes match req.headers().get(CONTENT_RANGE) { Some(r) => { @@ -152,7 +152,7 @@ pub async fn file_service( } _ => Ok(Response::builder() .status(StatusCode::NOT_ACCEPTABLE) - .body("Missing required chunk_file_hash header".into()) + .body("Missing required file_manifest_hash header".into()) .unwrap()), } } diff --git a/subfile-service/src/subfile_server/util.rs b/file-service/src/file_server/util.rs similarity index 96% rename from subfile-service/src/subfile_server/util.rs rename to file-service/src/file_server/util.rs index 410f04a..6ff9c2e 100644 --- a/subfile-service/src/subfile_server/util.rs +++ b/file-service/src/file_server/util.rs @@ -1,11 +1,12 @@ use build_info::BuildInfo; +use file_exchange::{ + errors::{Error, ServerError}, + util::{build_wallet, wallet_address}, +}; use serde::{Deserialize, Serialize}; use std::fs; use std::{collections::HashMap, io}; -use subfile_exchange::util::{build_wallet, wallet_address}; - -use subfile_exchange::errors::{Error, ServerError}; #[derive(Serialize, Deserialize)] pub struct Health { diff --git a/file-service/src/lib.rs b/file-service/src/lib.rs new file mode 100644 index 0000000..8978ee2 --- /dev/null +++ b/file-service/src/lib.rs @@ -0,0 +1,2 @@ +pub mod config; +pub mod file_server; diff --git a/subfile-service/src/main.rs b/file-service/src/main.rs similarity index 69% rename from subfile-service/src/main.rs rename to file-service/src/main.rs index 3ac5984..870e7de 100644 --- a/subfile-service/src/main.rs +++ b/file-service/src/main.rs @@ -1,6 +1,6 @@ use dotenv::dotenv; -use subfile_service::{config::Config, subfile_server::init_server}; +use file_service::{config::Config, file_server::init_server}; #[tokio::main] async fn main() { diff --git a/scripts/release.sh b/scripts/file-exchange-release.sh similarity index 55% rename from scripts/release.sh rename to scripts/file-exchange-release.sh index 16eaa3c..e96e319 100755 --- a/scripts/release.sh +++ b/scripts/file-exchange-release.sh @@ -3,7 +3,7 @@ set -e set -x -VERSION="v$(cargo metadata --quiet --format-version 1 | jq -r '.packages[] | select(.name == "subfile-exchange") | .version')" +VERSION="v$(cargo metadata --quiet --format-version 1 | jq -r '.packages[] | select(.name == "file-exchange") | .version')" if [[ -z "$VERSION" ]]; then echo "Usage: $0 " @@ -13,8 +13,8 @@ fi git-cliff -o CHANGELOG.md ( - git add CHANGELOG.md Cargo.lock Cargo.toml scripts/release.sh \ - && git commit -m "chore: release $VERSION" + git add CHANGELOG.md Cargo.lock Cargo.toml scripts/file-exchange-release.sh \ + && git commit -m "chore: release file-exchange $VERSION" ) || true # Publish to crates.io diff --git a/subfile-exchange/benches/new_chunk_file.rs b/subfile-exchange/benches/new_chunk_file.rs deleted file mode 100644 index 805a5d2..0000000 --- a/subfile-exchange/benches/new_chunk_file.rs +++ /dev/null @@ -1,35 +0,0 @@ -use criterion::async_executor::FuturesExecutor; -use criterion::{black_box, criterion_group, criterion_main, Criterion}; - -use subfile_exchange::{ - subfile::{local_file_system::Store, ChunkFile}, - test_util::CHUNK_SIZE, -}; -fn new_chunk_file_benchmark_file_store(c: &mut Criterion) { - // ChunkFile::new(&self.config.read_dir, file_name, self.config.chunk_size) - let read_dir = black_box("../example-file"); - let file_name = black_box("0017234600.dbin.zst"); - let file_size = black_box(CHUNK_SIZE); - - c.bench_function("new_chunk_file_benchmark_file_store", |b| { - b.iter(|| ChunkFile::new(read_dir, file_name, file_size).unwrap()) - }); -} - -fn new_chunk_file_benchmark_object_store(c: &mut Criterion) { - let store = black_box(Store::new("../example-file").unwrap()); - let file_name = black_box("0017234600.dbin.zst"); - let file_size = black_box(Some(CHUNK_SIZE as usize)); - - c.bench_function("new_chunk_file_benchmark_object_store", |b| { - b.to_async(FuturesExecutor) - .iter(|| store.chunk_file(file_name, file_size)) - }); -} - -criterion_group!( - benches, - new_chunk_file_benchmark_file_store, - new_chunk_file_benchmark_object_store -); -criterion_main!(benches); diff --git a/subfile-exchange/benches/validate_local_subfile.rs b/subfile-exchange/benches/validate_local_subfile.rs deleted file mode 100644 index 0abf736..0000000 --- a/subfile-exchange/benches/validate_local_subfile.rs +++ /dev/null @@ -1,15 +0,0 @@ -use criterion::black_box; -use criterion::Criterion; - -use criterion::{criterion_group, criterion_main}; -use subfile_exchange::test_util::simple_subfile; - -fn validate_local_subfile_benchmark(c: &mut Criterion) { - let subfile = black_box(simple_subfile()); - c.bench_function("validate_local_subfile", |b| { - b.iter(|| subfile.validate_local_subfile()) - }); -} - -criterion_group!(benches, validate_local_subfile_benchmark); -criterion_main!(benches); diff --git a/subfile-service/src/lib.rs b/subfile-service/src/lib.rs deleted file mode 100644 index 5ee3458..0000000 --- a/subfile-service/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod config; -pub mod subfile_server;