From 01564e42c6b1e3082a1938f332bf68404a22e983 Mon Sep 17 00:00:00 2001 From: Pranshu Maheshwari Date: Thu, 5 Dec 2024 12:25:47 -0500 Subject: [PATCH 001/114] Initial pipelines docs --- src/content/changelogs/pipelines.yaml | 11 + .../docs/pipelines/configuration/batching.mdx | 28 + .../docs/pipelines/configuration/index.mdx | 12 + .../configuration/partition-filenames.mdx | 30 + src/content/docs/pipelines/examples/index.mdx | 12 + src/content/docs/pipelines/get-started.mdx | 96 + src/content/docs/pipelines/index.mdx | 53 + .../docs/pipelines/observability/index.mdx | 12 + .../docs/pipelines/observability/metrics.mdx | 66 + src/content/docs/pipelines/pipelines-api.mdx | 7 + .../docs/pipelines/reference/changelog.mdx | 15 + .../docs/pipelines/reference/index.mdx | 12 + .../docs/pipelines/reference/limits.mdx | 23 + .../docs/pipelines/reference/pricing.mdx | 11 + .../pipelines/reference/wrangler-commands.mdx | 8 + src/content/docs/pipelines/sources/http.mdx | 60 + src/content/docs/pipelines/sources/index.mdx | 12 + .../docs/pipelines/sources/worker-bindings.md | 112 + .../docs/workers/wrangler/commands.mdx | 2776 +++++++++++++---- src/content/products/pipelines.yaml | 12 + src/icons/pipelines.svg | 1 + 21 files changed, 2809 insertions(+), 560 deletions(-) create mode 100644 src/content/changelogs/pipelines.yaml create mode 100644 src/content/docs/pipelines/configuration/batching.mdx create mode 100644 src/content/docs/pipelines/configuration/index.mdx create mode 100644 src/content/docs/pipelines/configuration/partition-filenames.mdx create mode 100644 src/content/docs/pipelines/examples/index.mdx create mode 100644 src/content/docs/pipelines/get-started.mdx create mode 100644 src/content/docs/pipelines/index.mdx create mode 100644 src/content/docs/pipelines/observability/index.mdx create mode 100644 src/content/docs/pipelines/observability/metrics.mdx create mode 100644 src/content/docs/pipelines/pipelines-api.mdx create mode 100644 src/content/docs/pipelines/reference/changelog.mdx create mode 100644 src/content/docs/pipelines/reference/index.mdx create mode 100644 src/content/docs/pipelines/reference/limits.mdx create mode 100644 src/content/docs/pipelines/reference/pricing.mdx create mode 100644 src/content/docs/pipelines/reference/wrangler-commands.mdx create mode 100644 src/content/docs/pipelines/sources/http.mdx create mode 100644 src/content/docs/pipelines/sources/index.mdx create mode 100644 src/content/docs/pipelines/sources/worker-bindings.md create mode 100644 src/content/products/pipelines.yaml create mode 100644 src/icons/pipelines.svg diff --git a/src/content/changelogs/pipelines.yaml b/src/content/changelogs/pipelines.yaml new file mode 100644 index 000000000000000..fff3054f92f7be2 --- /dev/null +++ b/src/content/changelogs/pipelines.yaml @@ -0,0 +1,11 @@ +--- +link: "/pipelines/reference/changelog/" +productName: Pipelines +productLink: "/pipelines/" +productArea: Developer Platform +productAreaLink: "/pipelines/" +entries: + - publish_date: "2024-09-24" + title: Pipelines is now in public beta. + description: |- + Pipelines, a new product to ingest and store real time streaming data, is now in public beta. The public beta is avaiable to any user with a [free or paid Workers plan](/workers/platform/pricing/). Create a Pipeline, and you'll be able to post data to it via HTTP or from a Cloudflare Worker. Pipelines handle batching, buffering, and partitioning the data, before writing it to an R2 bucket of your choice. It's useful to collect clickstream data, or ingest logs from a service. Start building with our [get started guide](/pipelines/getting-started/). diff --git a/src/content/docs/pipelines/configuration/batching.mdx b/src/content/docs/pipelines/configuration/batching.mdx new file mode 100644 index 000000000000000..1815ca47db7f43d --- /dev/null +++ b/src/content/docs/pipelines/configuration/batching.mdx @@ -0,0 +1,28 @@ +--- +pcx_content_type: concept +title: Batching +sidebar: + order: 10 +--- + +Pipelines automatically batches requests that are received via HTTP or from a Worker. Batching helps reduce the number of output files written to your destination, which can make them more efficient to query. + +There are three ways to define how requests are batched: + +1. `batch-max-mb`: The maximum amount of data that will be batched, in megabytes. Default is 10 MB, maximum is 100 MB. +2. `batch-max-rows`: The maximum number of rows or events in a batch before data is written. Default, and maximum, is 10,000 rows. +3. `batch-max-seconds`: The maximum duration of a batch before data is written, in seconds. Default is 15 seconds, maximum is 600 seconds. + +All three batch definitions work together. Whichever limit is reached first triggers the delivery of a batch. + +For example, a `batch-max-mb` = 100 MB and a `batch-max-seconds` = 600 means that if 100 MB of events are posted to the Pipeline, the batch will be delivered. However, if it takes longer than 600 seconds for 100 MB of events to be posted, a batch of all the messages that were posted during those 600 seconds will be created and delivered. + +## Batch settings + +You can configure the following batch-level settings to adjust how Pipelines create a batch: + +| Setting | Default | Minimum | Maximum | +| ----------------------------------------- | ----------- | --------- | ----------- | +| Maximum Batch Size `batch-max-mb` | 10 MB | 0.001 MB | 100 MB | +| Maximum Batch Timeout `batch-max-seconds` | 15 seconds | 0 seconds | 600 seconds | +| Maximum Batch Rows `batch-max-rows` | 10,000 rows | 1 row | 10,000 rows | diff --git a/src/content/docs/pipelines/configuration/index.mdx b/src/content/docs/pipelines/configuration/index.mdx new file mode 100644 index 000000000000000..06fe350e080ded6 --- /dev/null +++ b/src/content/docs/pipelines/configuration/index.mdx @@ -0,0 +1,12 @@ +--- +title: Configuration +pcx_content_type: navigation +sidebar: + order: 4 + group: + hideIndex: true +--- + +import { DirectoryListing } from "~/components" + + \ No newline at end of file diff --git a/src/content/docs/pipelines/configuration/partition-filenames.mdx b/src/content/docs/pipelines/configuration/partition-filenames.mdx new file mode 100644 index 000000000000000..3fb187f20cbba6d --- /dev/null +++ b/src/content/docs/pipelines/configuration/partition-filenames.mdx @@ -0,0 +1,30 @@ +--- +pcx_content_type: concept +title: Partitions, Filenames and Filepaths +sidebar: + order: 11 + +--- + +## Partitions +Partitioning organizes data into directories based on specific fields to improve query performance. It helps by reducing the amount of data scanned for queries, enabling faster reads. By default, Pipelines partitions data by event date. This will be customizable in the future. + +For example, the output from a Pipeline in your R2 bucket might look like this: +```sh +- event_date=2024-09-06/hr=15/37db9289-15ba-4e8b-9231-538dc7c72c1e-15.json.gz +- event_date=2024-09-06/hr=15/37db9289-15ba-4e8b-9231-538dc7c72c1e-15.json.gz +``` + +## Filepath +Customizing the filepath allows you to store data with a specific prefix inside your specified R2 bucket. The data will remain partitioned by date. + +To modify the prefix for a Pipeline using Wrangler: +```sh +wrangler pipelines update --filepath "test" +``` + +All the output records generated by your pipeline will be stored under the prefix "test", and will look like this: +```sh +- test/event_date=2024-09-06/hr=15/37db9289-15ba-4e8b-9231-538dc7c72c1e-15.json.gz +- test/event_date=2024-09-06/hr=15/37db9289-15ba-4e8b-9231-538dc7c72c1e-15.json.gz +``` diff --git a/src/content/docs/pipelines/examples/index.mdx b/src/content/docs/pipelines/examples/index.mdx new file mode 100644 index 000000000000000..f92017ab10cd4b4 --- /dev/null +++ b/src/content/docs/pipelines/examples/index.mdx @@ -0,0 +1,12 @@ +--- +title: Examples +pcx_content_type: navigation +sidebar: + order: 4 + group: + hideIndex: false +--- + +import { DirectoryListing } from "~/components" + + \ No newline at end of file diff --git a/src/content/docs/pipelines/get-started.mdx b/src/content/docs/pipelines/get-started.mdx new file mode 100644 index 000000000000000..4dc48e79a2d984d --- /dev/null +++ b/src/content/docs/pipelines/get-started.mdx @@ -0,0 +1,96 @@ +--- +title: Get started +pcx_content_type: get-started +sidebar: + order: 2 +head: + - tag: title + content: Get started +--- + +import { Render, PackageManagers } from "~/components"; + +Pipelines let you ingest real-time data streams, such as click events on a website, or logs from a service. You can send data to a Pipeline from a Worker, or via HTTP. Pipelines handle batching requests and scales in response to your workload. Finally, Pipelines deliver the output into R2 as JSON files, automatically handling partitioning and compression for efficient querying. + +By following this guide, you will: + +1. Create your first Pipeline. +2. Connect it to your R2 bucket. +3. Post data to it via HTTP. +4. Verify the output file written to R2. + +:::note + +Pipelines is in **public beta**, and any developer with a [paid Workers plan](/workers/platform/pricing/#workers) can start using Pipelines immediately. + +::: + +## Prerequisites + +To use Pipelines, you will need: + + + +## 1. Set up an R2 bucket and get your API tokens. + +Pipelines are built to ingest data and store it in an R2 bucket. Create a bucket by following the [get started guide for R2](r2/get-started/). Save the bucket name for the next step. + +Secondly, make sure to get your R2 API tokens to use in the next step. Follow the guide on obtaining R2 API tokens in the [R2 API Tokens Guide](r2/api/s3/tokens/), and make sure to save your Secret Access Key and Access Key IDs. + +## 2. Create a Pipeline + +To create a Pipeline using Wrangler, run the following command in a the terminal, and specify: + +- The name of your Pipeline +- The name of the R2 bucket you created in step 1 +- The R2 API credentials from Step 1 + +```sh +npx wrangler pipelines create [PIPELINE-NAME] --r2 [R2-BUCKET-NAME] --access-key-id [ACCESS-KEY-ID] --secret-access-key [SECRET-ACCESS-KEY] +``` + +When choosing a name for your Pipeline: + +1. Ensure it is descriptive and relevant to the type of events you intend to ingest. You cannot change the name of the Pipeline after creating it. +2. Pipeline names must be between 1 and 63 characters long. +3. The name cannot contain special characters outside dashes (`-`). +4. The name must start and end with a letter or a number. + +Once you create your Pipeline, you will receive a HTTP endpoint which you can post data to. You should see output as shown below: + +```sh output +πŸŒ€ Authorizing R2 bucket "[R2-BUCKET-NAME]" +πŸŒ€ Creating pipeline named "[PIPELINE-NAME]" +βœ… Successfully created pipeline [PIPELINE-NAME] with ID [PIPELINE-ID] + +You can now send data to your pipeline with: + curl "https://.pipelines.cloudflare.com/" -d '[{ ...JSON_DATA... }]' +``` + +## 3. Post data to your pipeline + +Use a curl command in your terminal to post an array of JSON objects to the endpoint you received in step 1. + +```sh +curl -H "Content-Type:application/json" \ + -d '[{"account_id":"test", "other_data": "test"},{"account_id":"test","other_data": "test2"}]' \ + +``` + +Once the Pipeline successfully accepts the data, you will receive a success message. + +Pipelines handle batching the data, so you can continue posting data to the Pipeline. Once a batch is filled up, the data will be partitioned by date, and written to your R2 bucket. + +## 4. Verify in R2 + +Go to the R2 bucket you created in step 1 via [the Cloudflare dashboard](https://dash.cloudflare.com/). You should see a prefix for today's date. Click through, and you will see a file created containing the JSON data you posted in step 3. + +## Summary + +By completing this guide, you have: + +- Created a Pipeline +- Connected the Pipeline with an R2 bucket as destination. +- Posted data to the R2 bucket via HTTP. +- Verified the output in the R2 bucket. + diff --git a/src/content/docs/pipelines/index.mdx b/src/content/docs/pipelines/index.mdx new file mode 100644 index 000000000000000..183179454a5c12a --- /dev/null +++ b/src/content/docs/pipelines/index.mdx @@ -0,0 +1,53 @@ +--- +title: Overview +type: overview +pcx_content_type: overview +sidebar: + order: 1 + badge: + text: Beta +head: + - tag: title + content: Pipelines +--- + +import { CardGrid, Description, Feature, LinkTitleCard, Plan, RelatedProduct } from "~/components"; + + + +Ingest, transform, and store real time data streams in R2. + + + + + +*** +## Features + + +Pipelines generate an HTTP endpoint, which you can post data to. + + + +Convert incoming records into compressed JSON files, and write to R2. + + +*** + +## More resources + + + + +Learn about Pipelines limits. + + + +Follow @CloudflareDev on Twitter to learn about product announcements, and what is new in Cloudflare Workers. + + + +Connect with the Workers community on Discord to ask questions, show what you are building, and discuss the platform with other developers. + + + \ No newline at end of file diff --git a/src/content/docs/pipelines/observability/index.mdx b/src/content/docs/pipelines/observability/index.mdx new file mode 100644 index 000000000000000..c1576788609ddc5 --- /dev/null +++ b/src/content/docs/pipelines/observability/index.mdx @@ -0,0 +1,12 @@ +--- +title: Observability +pcx_content_type: navigation +sidebar: + order: 5 + group: + hideIndex: true +--- + +import { DirectoryListing } from "~/components" + + \ No newline at end of file diff --git a/src/content/docs/pipelines/observability/metrics.mdx b/src/content/docs/pipelines/observability/metrics.mdx new file mode 100644 index 000000000000000..e5487c0341f7ac0 --- /dev/null +++ b/src/content/docs/pipelines/observability/metrics.mdx @@ -0,0 +1,66 @@ +--- +pcx_content_type: concept +title: Metrics +sidebar: + order: 10 + +--- + +Pipelines metrics are split across three different nodes under `viewer` > `accounts`. Refer to [Explore the GraphQL schema](/analytics/graphql-api/getting-started/explore-graphql-schema/) to learn how to navigate a GraphQL schema and discover which data are available. + +To learn more about the GraphQL Analytics API, refer to [GraphQL Analytics API](/analytics/graphql-api/). + +You can use the GraphQL API to measure metrics for data ingested, as well as data delivered. + +## Write GraphQL queries + +Examples of how to explore your Pipelines metrics. + +### Measure total bytes & records ingested over time period + +```graphql +query PipelineIngestion($accountTag: string!, $pipelineId: string!, $datetimeStart: Time!, $datetimeEnd: Time!) { + viewer { + accounts(filter: {accountTag: $accountTag}) { + pipelinesIngestionAdaptiveGroups( + limit: 10000 + filter: { + pipelineId: $pipelineId + datetime_geq: $datetimeStart + datetime_leq: $datetimeEnd + } + + ) + { + sum { + ingestedBytes, + ingestedRecords, + } + } + } + } +} +``` + +### Measure volume of data delivered + +```graphql +query PipelineDelivery($accountTag: string!, $queueId: string!, $datetimeStart: Time!, $datetimeEnd: Time!) { + viewer { + accounts(filter: {accountTag: $accountTag}) { + pipelinesDeliveryAdaptiveGroups( + limit: 10000 + filter: { + pipelineId: $queueId + datetime_geq: $datetimeStart + datetime_leq: $datetimeEnd + } + ) { + sum { + deliveredBytes, + } + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pipelines/pipelines-api.mdx b/src/content/docs/pipelines/pipelines-api.mdx new file mode 100644 index 000000000000000..09c4cfbbe132130 --- /dev/null +++ b/src/content/docs/pipelines/pipelines-api.mdx @@ -0,0 +1,7 @@ +--- +pcx_content_type: navigation +title: Pipelines REST API +sidebar: + order: 10 + +--- \ No newline at end of file diff --git a/src/content/docs/pipelines/reference/changelog.mdx b/src/content/docs/pipelines/reference/changelog.mdx new file mode 100644 index 000000000000000..e3fcb90c64098cc --- /dev/null +++ b/src/content/docs/pipelines/reference/changelog.mdx @@ -0,0 +1,15 @@ +--- +pcx_content_type: changelog +title: Changelog +changelog_file_name: + - pipelines +sidebar: + order: 99 + +--- + +import { ProductChangelog } from "~/components" + +{/* */} + + \ No newline at end of file diff --git a/src/content/docs/pipelines/reference/index.mdx b/src/content/docs/pipelines/reference/index.mdx new file mode 100644 index 000000000000000..a6f575945f80a9d --- /dev/null +++ b/src/content/docs/pipelines/reference/index.mdx @@ -0,0 +1,12 @@ +--- +pcx_content_type: navigation +title: Platform +sidebar: + order: 8 + group: + hideIndex: true +--- + +import { DirectoryListing } from "~/components" + + \ No newline at end of file diff --git a/src/content/docs/pipelines/reference/limits.mdx b/src/content/docs/pipelines/reference/limits.mdx new file mode 100644 index 000000000000000..df5c97e39320442 --- /dev/null +++ b/src/content/docs/pipelines/reference/limits.mdx @@ -0,0 +1,23 @@ +--- +pcx_content_type: concept +title: Limits +sidebar: + order: 2 +--- + +import { Render } from "~/components" + +:::note + +Many of these limits will increase during Pipelines' public beta period. [Follow our changelog](/pipelines/platform/changelog/) to keep up with the changes. + +::: + + +| Feature | Limit | +| --------------------------------------------- | ------------------------------------------------------------- | +| Requests per second | 10,000 | +| Maximum payload per request | 1 MB | +| Maximum batch size | 100 MB | +| Maximum batch records | 10,000 | +| Maximum batch duration | 600s | diff --git a/src/content/docs/pipelines/reference/pricing.mdx b/src/content/docs/pipelines/reference/pricing.mdx new file mode 100644 index 000000000000000..3f2050626157260 --- /dev/null +++ b/src/content/docs/pipelines/reference/pricing.mdx @@ -0,0 +1,11 @@ +--- +pcx_content_type: concept +title: Pricing +sidebar: + order: 1 +head: + - tag: title + content: Pipelines Pricing +--- + +TODO \ No newline at end of file diff --git a/src/content/docs/pipelines/reference/wrangler-commands.mdx b/src/content/docs/pipelines/reference/wrangler-commands.mdx new file mode 100644 index 000000000000000..53a355123c785e2 --- /dev/null +++ b/src/content/docs/pipelines/reference/wrangler-commands.mdx @@ -0,0 +1,8 @@ +--- +pcx_content_type: navigation +title: Wrangler commands +external_link: /workers/wrangler/commands/#pipelines +sidebar: + order: 80 + +--- \ No newline at end of file diff --git a/src/content/docs/pipelines/sources/http.mdx b/src/content/docs/pipelines/sources/http.mdx new file mode 100644 index 000000000000000..dcaf24730536b66 --- /dev/null +++ b/src/content/docs/pipelines/sources/http.mdx @@ -0,0 +1,60 @@ +--- +title: HTTP +pcx_content_type: concept +sidebar: + order: 1 +head: + - tag: title + content: HTTP +--- + +import { Render, PackageManagers } from "~/components"; + +You can send data to your Pipeline via HTTP. By default, HTTP is enabled on all Pipelines. When you create a Pipeline, it will generate an HTTP endpoint †hat you can make POST requests to. + +```sh +$ npx wrangler pipelines create [PIPELINE-NAME] --r2 [R2-BUCKET-NAME] --access-key-id [ACCESS-KEY-ID] --secret-access-key [SECRET-ACCESS-KEY] + +πŸŒ€ Creating pipeline named "[PIPELINE-NAME]" +βœ… Successfully created pipeline [PIPELINE-NAME] with ID [PIPELINE-ID] + +You can now send data to your pipeline with: + curl "https://.pipelines.cloudflare.com/" -d '[{ ...JSON_DATA... }]' +``` + +## Turning HTTP off +By default, ingestion via HTTP is turned on for all Pipelines. You can turn it off by setting `--http false` when creating or updating a Pipeline. + +```sh +$ npx wrangler pipelines create [PIPELINE-NAME] --r2 [R2-BUCKET-NAME] --access-key-id [ACCESS-KEY-ID] --secret-access-key [SECRET-ACCESS-KEY] --http false +``` + +Ingestion URLs are tied to your Pipeline ID. Turning HTTP off, and then turning it back on, will not change the URL. + +## Authentication +You can secure your HTTP ingestion endpoint using Cloudflare API tokens. By default, authentication is turned off. To enable authentication, use `--authentication true` while creating or updating a Pipeline. + +``` +$ npx wrangler pipelines create [PIPELINE-NAME] --r2 [R2-BUCKET-NAME] --access-key-id [ACCESS-KEY-ID] --secret-access-key [SECRET-ACCESS-KEY] --authentication true +``` + +Once authentication is turned on, you will need to include a Cloudflare API token in your request headers. + +### Get API token +1. Log in to the [Cloudflare dashboard](https://dash.cloudflare.com) and select your account. +2. Navigate to your [API Keys](https://dash.cloudflare.com/profile/api-tokens) +3. Select *Create Token* +4. Choose the template for Workers Pipelines. Click on *continue to summary*, and finally on *create token*. Make sure to copy the API token, and save it securely. + +### Making authenticated requests +Include the API token you created in the previous step in the headers for your request: + +```sh +curl https://.pipelines.cloudflare.com + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${API_TOKEN}" \ + -d '[ + {"key1": "value1", "key2": "value2"}, + {"key1": "value3", "key2": "value4"} + ]' +``` diff --git a/src/content/docs/pipelines/sources/index.mdx b/src/content/docs/pipelines/sources/index.mdx new file mode 100644 index 000000000000000..2ddffca7bfa595b --- /dev/null +++ b/src/content/docs/pipelines/sources/index.mdx @@ -0,0 +1,12 @@ +--- +pcx_content_type: navigation +title: Sources +sidebar: + order: 3 + group: + hideIndex: true +--- + +import { DirectoryListing } from "~/components" + + \ No newline at end of file diff --git a/src/content/docs/pipelines/sources/worker-bindings.md b/src/content/docs/pipelines/sources/worker-bindings.md new file mode 100644 index 000000000000000..35135dae7532088 --- /dev/null +++ b/src/content/docs/pipelines/sources/worker-bindings.md @@ -0,0 +1,112 @@ +--- +title: Worker Bindings +pcx_content_type: concept +sidebar: + order: 2 +head: + - tag: title + content: Worker Bindings +--- + +import { Render, PackageManagers } from "~/components"; + +You can send records to your Pipeline directly from a [Cloudflare Worker](workers/). To do so, you need to: +1. Create a Worker +2. Create a Pipeline +3. Add your Pipeline as a binding in your Workers' `wrangler.toml` file +4. Write your Worker, to send records to your Pipeline + +## 1. Create a Worker +Create a Cloudflare Worker if you don't alreadyΓ₯have one. This Worker will send records to your Pipeline. + +To create a Worker, run: + + + + + +This will create a new directory, which will include both a `src/index.ts` Worker script, and a [`wrangler.toml`](/workers/wrangler/configuration/) configuration file. Navigate into the newly created directory: + +```sh +cd pipeline-worker +``` + +## 2. Create a Pipeline +Create a new Pipeline, if you don't already have one. Follow the instructions in the (get started guide)[pipelines/get-started] if this is your first time creating a Pipeline. + +By default, Worker bindings are enabled on all Pipelines. Keep track of the name you gave your Pipeline in this stage; we'll use it in the next step. + +## 3. Add a Binding +To connect your Worker to your Pipeline, you need to create a binding. [Bindings](workers/runtime-apis/bindings/) allow you to grant specific capabilities to your Worker. + +Open your newly generated `wrangler.toml` configuration file and add the following: + +```toml +[[pipelines]] + binding = "MY_PIPELINE" + pipeline = "" +``` + +Replace `` with the name of the Pipeline you created in step 2. Next, replace `MY_PIPELINE` with the name you want for your `binding`. The binding must be a valid JavaScript variable name. This is the variable you will use to reference this queue in your Worker. + +## 4. Write your Worker +You will now configure your Worker to send records to your Pipeline. Your Worker will: + +1. Take a request it receives from the browser +2. Transform the request to JSON +3. Send the resulting record to your Pipeline + +In your Worker project directory, open the `src` folder and add the following to your `index.ts` file: +```ts +export interface Env { + : Pipeline; +} + +export default { + async fetch(req, env, ctx): Promise { + let record = { + url: req.url, + method: req.method, + headers: Object.fromEntries(req.headers) + } + await env.MY_PIPELINE.send([record]); + return new Response('Success'); + }, +} satisfies ExportedHandler; +``` + +Replace `MY_PIPELINE` with the name of the binding you set in Step 3. If sending the record to the Pipeline fails, your Worker will return an error (raise an exception). If sending the record succeeds, it will return `Success` back with a HTTP `200` status code to the browser. + +In a production application, you would likely use a [`try...catch`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/try...catch) statement to catch the exception and handle it directly (for example, return a custom error or even retry). + +### Publish your Worker +With your `wrangler.toml` file and `index.ts` file configured, you are ready to publish your producer Worker. To publish your producer Worker, run: + +```sh +npx wrangler deploy +``` + +You should see output that resembles the below, with a `*.workers.dev` URL by default. + +``` +Uploaded (0.76 sec) +Published (0.29 sec) + https://..workers.dev +``` + +Copy your `*.workers.dev` subdomain and paste it into a new browser tab. Refresh the page a few times to send records to your Pipeline. Your browser should return the `Success` response after sending the record to your Pipeline. + +## 5. Verify in R2 +Go to the R2 bucket you created in step 2 via [the Cloudflare dashboard](https://dash.cloudflare.com/). You should see a prefix for today's date. Click through, and you'll find one or more files, containing the records you sent in step 4. diff --git a/src/content/docs/workers/wrangler/commands.mdx b/src/content/docs/workers/wrangler/commands.mdx index 688dbf9f236adc1..a466bd14df732ae 100644 --- a/src/content/docs/workers/wrangler/commands.mdx +++ b/src/content/docs/workers/wrangler/commands.mdx @@ -12,7 +12,7 @@ import { TabItem, Tabs, Render, Type, MetaInfo } from "~/components"; Wrangler offers a number of commands to manage your Cloudflare Workers. - [`docs`](#docs) - Open this page in your default browser. -- [`init`](#init) - Create a new project from a variety of web frameworks and templates. +- [`init`](#init) - Create a new project from a variety of web frameworks and templates. (Deprecated β€”Β use `npm create cloudflare@latest` instead) - [`generate`](#generate) - Create a Wrangler project using an existing [Workers template](https://github.com/cloudflare/worker-template). - [`d1`](#d1) - Interact with D1. - [`vectorize`](#vectorize) - Interact with Vectorize indexes. @@ -20,7 +20,7 @@ Wrangler offers a number of commands to manage your Cloudflare Workers. - [`deploy`](#deploy) - Deploy your Worker to Cloudflare. - [`dev`](#dev) - Start a local server for developing your Worker. - [`publish`](#publish) - Publish your Worker to Cloudflare. -- [`delete`](#delete-2) - Delete your Worker from Cloudflare. +- [`delete`](#delete-3) - Delete your Worker from Cloudflare. - [`kv namespace`](#kv-namespace) - Manage Workers KV namespaces. - [`kv key`](#kv-key) - Manage key-value pairs within a Workers KV namespace. - [`kv bulk`](#kv-bulk) - Manage multiple key-value pairs within a Workers KV namespace in batches. @@ -28,9 +28,9 @@ Wrangler offers a number of commands to manage your Cloudflare Workers. - [`r2 object`](#r2-object) - Manage Workers R2 objects. - [`secret`](#secret) - Manage the secret variables for a Worker. - [`secret:bulk`](#secretbulk) - Manage multiple secret variables for a Worker. -- [`workflows`](#workflows) - Manage and configure Workflows. - [`tail`](#tail) - Start a session to livestream logs from a deployed Worker. - [`pages`](#pages) - Configure Cloudflare Pages. +- [`pipelines`](#pipelines) - Configure Cloudflare Pipelines. - [`queues`](#queues) - Configure Workers Queues. - [`login`](#login) - Authorize Wrangler with your Cloudflare account using OAuth. - [`logout`](#logout) - Remove Wrangler’s authorization for accessing your account. @@ -44,7 +44,16 @@ Wrangler offers a number of commands to manage your Cloudflare Workers. :::note - +The following global flags work on every command, with some exceptions for `pages` commands. + +- `--help` + - Show help. +- `--version` + - Show version number. +- `--config` (not supported by Pages) + - Path to `.toml` configuration file. +- `--experimental-json-config` (not supported by Pages) + - ⚠️ This is an experimental command. Read configuration from a `wrangler.json` file, instead of `wrangler.toml`. `wrangler.json` is a [JSONC](https://code.visualstudio.com/docs/languages/json#_json-with-comments) file. ::: @@ -128,13 +137,13 @@ wrangler docs [] - `COMMAND` - The Wrangler command you want to learn more about. This opens your default browser to the section of the documentation that describes the command. - - ## `init` :::note -The `init` command will be removed in a future version. Please use `npm create cloudflare@latest` instead. + +The `init` command will be removed in a future version. Please use `npm create cloudflare\@2.5.0` instead. + ::: @@ -152,8 +161,6 @@ wrangler init [] [OPTIONS] - Fetch a Worker initialized from the dashboard. This is done by passing the flag and the Worker name. `wrangler init --from-dash `. - The `--from-dash` command will not automatically sync changes made to the dashboard after the command is used. Therefore, it is recommended that you continue using the CLI. - - --- ## `generate` @@ -175,8 +182,6 @@ wrangler generate [] [TEMPLATE] - `TEMPLATE` - The URL of a GitHub template, with a default [worker-template](https://github.com/cloudflare/worker-template). Browse a list of available templates on the [cloudflare/workers-sdk](https://github.com/cloudflare/workers-sdk/tree/main/templates#usage) repository. - - --- ## `d1` @@ -197,8 +202,6 @@ wrangler d1 create [OPTIONS] - Provide an optional [location hint](/d1/configuration/data-location/) for your database leader. - Available options include `weur` (Western Europe), `eeur` (Eastern Europe), `apac` (Asia Pacific), `oc` (Oceania), `wnam` (Western North America), and `enam` (Eastern North America). - - ### `info` Get information about a D1 database, including the current database size and state. @@ -212,8 +215,6 @@ wrangler d1 info [OPTIONS] - `--json` - Return output as JSON rather than a table. - - ### `list` List all D1 databases in your account. @@ -225,8 +226,6 @@ wrangler d1 list [OPTIONS] - `--json` - Return output as JSON rather than a table. - - ### `delete` Delete a D1 database. @@ -240,8 +239,6 @@ wrangler d1 delete [OPTIONS] - `-y, --skip-confirmation` - Skip deletion confirmation prompt. - - ### `execute` Execute a query on a D1 database. @@ -277,8 +274,6 @@ You must provide either `--command` or `--file` for this command to run successf - `--batch-size` - Number of queries to send in a single batch. - - ### `export` Export a D1 database or table's schema and/or content to a `.sql` file. @@ -300,8 +295,6 @@ wrangler d1 export [OPTIONS] - `--no-schema` - Controls whether export SQL file contains database schema. Note that `--no-schema=true` is not recommended due to a known wrangler limitation that intreprets the value as false. - - ### `time-travel restore` Restore a database to a specific point-in-time using [Time Travel](/d1/reference/time-travel/). @@ -319,8 +312,6 @@ wrangler d1 time-travel restore [OPTIONS] - `--json` - Return output as JSON rather than a table. - - ### `time-travel info` Inspect the current state of a database for a specific point-in-time using [Time Travel](/d1/reference/time-travel/). @@ -336,8 +327,6 @@ wrangler d1 time-travel info [OPTIONS] - `--json` b - Return output as JSON rather than a table. - - ### `backup create` :::caution @@ -356,8 +345,6 @@ wrangler d1 backup create - `DATABASE_NAME` - The name of the D1 database to backup. - - ### `backup list` :::caution @@ -376,8 +363,6 @@ wrangler d1 backup list - `DATABASE_NAME` - The name of the D1 database to list the backups of. - - ### `backup restore` :::caution @@ -398,8 +383,6 @@ wrangler d1 backup restore - `BACKUP_ID` - The ID of the backup you wish to restore. - - ### `backup download` :::caution @@ -422,8 +405,6 @@ wrangler d1 backup download - `--output` - The `.sqlite3` file to write to (defaults to `'..sqlite3'`). - - ### `migrations create` Create a new migration. @@ -443,8 +424,6 @@ wrangler d1 migrations create - `MIGRATION_NAME` - A descriptive name for the migration you wish to create. - - ### `migrations list` View a list of unapplied migration files. @@ -464,8 +443,6 @@ wrangler d1 migrations list [OPTIONS] - `--preview` - Show the list of unapplied migration files on your preview D1 database (as defined by `preview_database_id` in [`wrangler.toml`](/workers/wrangler/configuration/#d1-databases)). - - ### `migrations apply` Apply any unapplied migrations. @@ -484,8 +461,6 @@ wrangler d1 migrations apply [OPTIONS] - `DATABASE_NAME` - The name of the D1 database you wish to apply your migrations on. -- `--env` - - Specify which environment configuration to use for D1 binding - `--local` - Execute any unapplied migrations on your locally persisted D1 database. - `--remote` @@ -497,8 +472,6 @@ wrangler d1 migrations apply [OPTIONS] - `--batch-size` - Number of queries to send in a single batch. - - --- ## `hyperdrive` @@ -532,8 +505,6 @@ npx wrangler vectorize create [--dimensions=] [--me - `--deprecated-v1` - Create a legacy Vectorize index. Please note that legacy Vectorize indexes are on a [deprecation path](/vectorize/reference/transition-vectorize-legacy). - - ### `list` List all Vectorize indexes in your account, including the configured dimensions and distance metric. @@ -545,8 +516,6 @@ npx wrangler vectorize list - `--deprecated-v1` - List legacy Vectorize indexes. Please note that legacy Vectorize indexes are on a [deprecation path](/vectorize/reference/transition-vectorize-legacy). - - ### `get` Get details about an individual index, including its configuration. @@ -560,8 +529,6 @@ npx wrangler vectorize get - `--deprecated-v1` - Get a legacy Vectorize index. Please note that legacy Vectorize indexes are on a [deprecation path](/vectorize/reference/transition-vectorize-legacy). - - ### `info` Get some additional information about an individual index, including the vector count and details about the last processed mutation. @@ -573,8 +540,6 @@ npx wrangler vectorize info - `INDEX_NAME` - The name of the index to fetch details for. - - ### `delete` Delete a Vectorize index. @@ -590,8 +555,6 @@ npx wrangler vectorize delete [OPTIONS] - `--deprecated-v1` - Delete a legacy Vectorize index. Please note that legacy Vectorize indexes are on a [deprecation path](/vectorize/reference/transition-vectorize-legacy). - - ### `insert` Insert vectors into an index. @@ -609,8 +572,6 @@ npx wrangler vectorize insert [OPTIONS] - `--deprecated-v1` - Insert into a legacy Vectorize index. Please note that legacy Vectorize indexes are on a [deprecation path](/vectorize/reference/transition-vectorize-legacy). - - ### `upsert` Upsert vectors into an index. Existing vectors in the index would be overwritten. @@ -626,8 +587,6 @@ npx wrangler vectorize upsert [OPTIONS] - `--batch-size` - The number of vectors to insert at a time (default: `5000`). - - ### `query` Query a Vectorize index for similar vectors. @@ -638,10 +597,8 @@ npx wrangler vectorize query [OPTIONS] - `INDEX_NAME` - The name of the Vectorize index to query. -- `--vector` - - Vector against which the Vectorize index is queried. Either this or the `vector-id` param must be provided. -- `--vector-id` - - Identifier for a vector that is already present in the index against which the index is queried. Either this or the `vector` param must be provided. +- `--vector` + - Vector against which the Vectorize index is queried. - `--top-k` - The number of vectors to query (default: `5`). - `--return-values` @@ -653,8 +610,6 @@ npx wrangler vectorize query [OPTIONS] - `--filter` - Filter vectors based on this metadata filter. Example: `'{ 'p1': 'abc', 'p2': { '$ne': true }, 'p3': 10, 'p4': false, 'nested.p5': 'abcd' }'` - - ### `get-vectors` Fetch vectors from a Vectorize index using the provided ids. @@ -668,8 +623,6 @@ npx wrangler vectorize get-vectors [OPTIONS] - `--ids` - List of ids for which vectors must be fetched. - - ### `delete-vectors` Delete vectors in a Vectorize index using the provided ids. @@ -683,8 +636,6 @@ npx wrangler vectorize delete-vectors [OPTIONS] - `--ids` - List of ids corresponding to the vectors that must be deleted. - - ### `create-metadata-index` Enable metadata filtering on the specified property. @@ -700,8 +651,6 @@ npx wrangler vectorize create-metadata-index [OPTIONS] - `--type` - Data type of the property. Must be one of `string`, `number`, or `boolean`. - - ### `list-metadata-index` List metadata properties on which metadata filtering is enabled. @@ -713,8 +662,6 @@ npx wrangler vectorize list-metadata-index [OPTIONS] - `INDEX_NAME` - The name of the Vectorize index for which metadata indexes needs to be fetched. - - ### `delete-metadata-index` Disable metadata filtering on the specified property. @@ -728,8 +675,6 @@ npx wrangler vectorize delete-metadata-index [OPTIONS] - `--property-name` - Metadata property for which metadata filtering should be disabled. - - --- ## `dev` @@ -827,8 +772,6 @@ As of Wrangler v3.2.0, `wrangler dev` is supported by any Linux distributions pr - `--alias` `Array` - Specify modules to alias using [module aliasing](/workers/wrangler/configuration/#module-aliasing). - - `wrangler dev` is a way to [locally test](/workers/testing/local-development/) your Worker while developing. With `wrangler dev` running, send HTTP requests to `localhost:8787` and your Worker should execute as expected. You will also see `console.log` messages and exceptions appearing in your terminal. --- @@ -903,8 +846,6 @@ None of the options for this command are required. Also, many can be set in your - `--dispatch-namespace` - Specify the [Workers for Platforms dispatch namespace](/cloudflare-for-platforms/workers-for-platforms/get-started/configuration/#2-create-a-dispatch-namespace) to upload this Worker to. - - --- ## `publish` @@ -940,8 +881,6 @@ wrangler delete [ + + + +
+

Our Products

+
+ + + +
+
+ + + + + + +``` + + +The above code does the following: + +- Uses Tailwind CSS to style the page. +- Renders a list of products. +- Adds a button to view the details of a product. +- Adds a button to add a product to the cart. +- Contains a `handleClick` function to handle the click events. This function logs the action and the product ID. In the next steps, you will add the logic to send the click events to your pipeline. + +## 3. Generate clickstream data + +You need to send clickstream data like the `timestamp`, `user_id`, `session_id`, and `device_info` to your pipeline. You can generate this data on the client side. Add the following function in the ` + + + +
+

Our Products

+
+ + + +
+
+ + + + + + +``` + + +The above code does the following: + +- Uses Tailwind CSS to style the page. +- Renders a list of products. +- Adds a button to view the details of a product. +- Adds a button to add a product to the cart. +- Contains a `handleClick` function to handle the click events. This function logs the action and the product ID. In the next steps, you will create a pipeline and add the logic to send the click events to this pipeline. + +## 3. Create a pipeline + +You need to create a new pipeline and connect it to your R2 bucket. + +Create a new pipeline `clickstream-pipeline-client` using the [Wrangler CLI](/workers/wrangler/): + +```sh +npx wrangler pipelines create clickstream-pipeline-client --r2-bucket --compression none +``` + +Replace `` with the name of your R2 bucket. + +When you run the command, you will be prompted to authorize Cloudflare Workers Pipelines to create R2 API tokens on your behalf. These tokens are required by your Pipeline. Your Pipeline uses these tokens when loading data into your bucket. You can approve the request through the browser link which will open automatically. + +:::note +In the above command, you create a pipeline with the `--compression none` flag. This flag will prevent the pipeline from compressing the data before storing it in the R2 bucket. This is useful for testing purposes. In a production environment, you should use compression. We recommend keeping the default settings. +::: + +```output +πŸŒ€ Authorizing R2 bucket "" +Opening a link in your default browser: https://oauth.pipelines.cloudflare.com/oauth/login?accountId=&bucketName=&pipelineName=clickstream-pipeline-client +πŸŒ€ Checking access to R2 bucket "" +πŸŒ€ Creating Pipeline named "clickstream-pipeline-client" +βœ… Successfully created Pipeline "clickstream-pipeline-client" with id +πŸŽ‰ You can now send data to your Pipeline! + +To start interacting with this Pipeline from a Worker, open your Worker’s config file and add the following binding configuration: + +{ + "pipelines": [ + { + "pipeline": "clickstream-pipeline-client", + "binding": "PIPELINE" + } + ] +} + +Send data to your Pipeline's HTTP endpoint: + + curl "https://.pipelines.cloudflare.com" -d '[{"foo": "bar"}]' +``` + +Make a note of the URL of the pipeline. You will use this URL to send the clickstream data from the client-side. + +## 4. Generate clickstream data + +You need to send clickstream data like the `timestamp`, `user_id`, `session_id`, and `device_info` to your pipeline. You can generate this data on the client side. Add the following function in the `