diff --git a/_data/harnesses/anomaly-detection/ksql.yml b/_data/harnesses/anomaly-detection/ksql.yml index e5c80cde17..5e67b7dfcd 100644 --- a/_data/harnesses/anomaly-detection/ksql.yml +++ b/_data/harnesses/anomaly-detection/ksql.yml @@ -128,7 +128,7 @@ dev: - title: Write your statements to a file content: - action: make_file - file: src/test-statements.sql + file: src/statements.sql render: file: tutorials/anomaly-detection/ksql/markup/dev/make-src-file.adoc @@ -136,6 +136,11 @@ test: steps: - title: Create the test data content: + - action: make_file + file: test/input.json + render: + file: tutorials/anomaly-detection/ksql/markup/test/make-test-input.adoc + - action: make_file file: test/output.json render: diff --git a/_data/tutorials.yaml b/_data/tutorials.yaml index 016f4c50e6..f7d28b4e29 100644 --- a/_data/tutorials.yaml +++ b/_data/tutorials.yaml @@ -551,7 +551,7 @@ anomaly-detection: meta-description: "detect anomalies in a stream of Kafka events" slug: "/anomaly-detection" question: "If you have time series events in a Kafka topic, how can you find anomalous events?" - introduction: "A common technique of fraudsters is to disguise transactions under the name of a popular company, the idea being that the chances of them being recognized is very low. For example, transactions labeled Verizon, Citibank, or USPS are likely to look similar and blend in with legitimate transactions. This tutorial shows you how to identify this pattern of behavior by detecting 'abnormal' transactions that occur within a window of time. + introduction: "A common technique of fraudsters is to disguise transactions under the name of a popular company, the idea being that the chances of them being recognized is very low. For example, transactions labeled Verizon, Citibank, or USPS are likely to look similar and blend in with legitimate transactions. This tutorial shows you how to identify this pattern of behavior by detecting abnormal transactions that occur within a window of time. Normally, a group of these transactions will occur within a 24 hour period. In fraud detection, financial institutions will categorize this behavior as unusual and alert their fraud team to investigate immediately. Other example use cases include detecting ATM fraud or unusual credit card activity." status: diff --git a/_includes/tutorials/anomaly-detection/ksql/code/Makefile b/_includes/tutorials/anomaly-detection/ksql/code/Makefile index 24fdff4312..a78ebb9724 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/Makefile +++ b/_includes/tutorials/anomaly-detection/ksql/code/Makefile @@ -13,5 +13,6 @@ tutorial: harness-runner ../../../../../_data/harnesses/anomaly-detection/ksql.yml $(TEMP_DIR) $(SEQUENCE) diff --strip-trailing-cr $(STEPS_DIR)/dev/find-suspicious-transactions-output.log $(DEV_OUTPUTS_DIR)/find-suspicious-transactions/output-0.log diff --strip-trailing-cr $(STEPS_DIR)/dev/print-accounts-to-monitor.log $(DEV_OUTPUTS_DIR)/print-accounts-to-monitor/output-0.log + bash -c "diff --strip-trailing-cr <(cut -d ',' -f 2 $(STEPS_DIR)/dev/print-accounts-to-monitor.log) <(cut -d ',' -f 2- $(DEV_OUTPUTS_DIR)/print-accounts-to-monitor/output-0.log)" diff --strip-trailing-cr $(STEPS_DIR)/test/expected-results.log $(TEST_OUTPUTS_DIR)/test-results.log reset diff --git a/_includes/tutorials/anomaly-detection/ksql/code/docker-compose.yml b/_includes/tutorials/anomaly-detection/ksql/code/docker-compose.yml index 8ef08ea312..ab822c2390 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/docker-compose.yml +++ b/_includes/tutorials/anomaly-detection/ksql/code/docker-compose.yml @@ -29,6 +29,7 @@ services: KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_LOG_RETENTION_MS: -1 schema-registry: image: confluentinc/cp-schema-registry:7.1.0 @@ -43,9 +44,7 @@ services: SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:9092' ksqldb-server: - # In ksqlDB 0.14+, ksql-test-runner can experience a flaky timing issue with joins. - # This test case is temporarily left at an earlier version. - image: confluentinc/ksqldb-server:0.13.0 + image: confluentinc/ksqldb-server:0.24.0 hostname: ksqldb-server container_name: ksqldb-server depends_on: @@ -64,7 +63,7 @@ services: KSQL_KSQL_STREAMS_AUTO_OFFSET_RESET: "earliest" ksqldb-cli: - image: confluentinc/ksqldb-cli:0.13.0 + image: confluentinc/ksqldb-cli:0.24.0 container_name: ksqldb-cli depends_on: - broker diff --git a/_includes/tutorials/anomaly-detection/ksql/code/src/statements.sql b/_includes/tutorials/anomaly-detection/ksql/code/src/statements.sql index c6e61f12a8..2115bce5a9 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/src/statements.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/src/statements.sql @@ -1,22 +1,22 @@ -CREATE TABLE suspicious_names (CREATED_DATE VARCHAR, +CREATE TABLE suspicious_names (CREATED_TS VARCHAR, COMPANY_NAME VARCHAR PRIMARY KEY, COMPANY_ID INT) WITH (kafka_topic='suspicious_names', partitions=1, value_format='JSON', - timestamp='CREATED_DATE', - timestamp_format='yyyy-MM-dd HH:mm:ss'); + timestamp='CREATED_TS', + timestamp_format = 'yyyy-MM-dd HH:mm:ss'); -CREATE STREAM transactions (TXN_ID BIGINT, USERNAME VARCHAR, RECIPIENT VARCHAR, AMOUNT DOUBLE, TIMESTAMP VARCHAR) +CREATE STREAM transactions (TXN_ID BIGINT, USERNAME VARCHAR, RECIPIENT VARCHAR, AMOUNT DOUBLE, TS VARCHAR) WITH (kafka_topic='transactions', partitions=1, value_format='JSON', - timestamp='TIMESTAMP', - timestamp_format='yyyy-MM-dd HH:mm:ss'); + timestamp='TS', + timestamp_format = 'yyyy-MM-dd HH:mm:ss'); CREATE STREAM suspicious_transactions WITH (kafka_topic='suspicious_transactions', partitions=1, value_format='JSON') AS - SELECT T.TXN_ID, T.USERNAME, T.RECIPIENT, T.AMOUNT, T.TIMESTAMP + SELECT T.TXN_ID, T.USERNAME, T.RECIPIENT, T.AMOUNT, T.TS FROM transactions T INNER JOIN suspicious_names S @@ -24,9 +24,9 @@ CREATE STREAM suspicious_transactions CREATE TABLE accounts_to_monitor WITH (kafka_topic='accounts_to_monitor', partitions=1, value_format='JSON') AS - SELECT TIMESTAMPTOSTRING(WINDOWSTART, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_START, - TIMESTAMPTOSTRING(WINDOWEND, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_END, - USERNAME + SELECT USERNAME, + TIMESTAMPTOSTRING(WINDOWSTART, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_START, + TIMESTAMPTOSTRING(WINDOWEND, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_END FROM suspicious_transactions WINDOW TUMBLING (SIZE 24 HOURS) GROUP BY USERNAME diff --git a/_includes/tutorials/anomaly-detection/ksql/code/src/test-statements.sql b/_includes/tutorials/anomaly-detection/ksql/code/src/test-statements.sql deleted file mode 100644 index 8e6aa867d4..0000000000 --- a/_includes/tutorials/anomaly-detection/ksql/code/src/test-statements.sql +++ /dev/null @@ -1,49 +0,0 @@ -CREATE TABLE suspicious_names (CREATED_DATE VARCHAR, - COMPANY_NAME VARCHAR PRIMARY KEY, - COMPANY_ID INT) - WITH (kafka_topic='suspicious_names', - partitions=1, - value_format='JSON', - timestamp='CREATED_DATE', - timestamp_format='yyyy-MM-dd HH:mm:ss'); - -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-03-08 00:00:00', 'Verizon', 1); -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-10-31 00:00:00', 'Spirit Halloween', 2); -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-12-15 00:00:00', 'Best Buy', 3); - -CREATE STREAM transactions (TXN_ID BIGINT, USERNAME VARCHAR, RECIPIENT VARCHAR, AMOUNT DOUBLE, TIMESTAMP VARCHAR) - WITH (kafka_topic='transactions', - partitions=1, - value_format='JSON', - timestamp='TIMESTAMP', - timestamp_format='yyyy-MM-dd HH:mm:ss'); - -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9900, 'Abby Normal', 'Verizon', 22.0, '2020-10-20 13:05:36'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (12, 'Victor von Frankenstein', 'Tattered Cover', 7.0, '2020-10-20 13:07:59'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (13, 'Frau Blücher', 'Peebles', 70.0, '2020-10-20 13:15:00'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9903, 'Abby Normal', 'Verizon', 61.0, '2020-10-20 13:31:02'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9901, 'Abby Normal', 'Spirit Halloween', 83.0, '2020-10-20 13:44:41'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9902, 'Abby Normal', 'Spirit Halloween', 46.0, '2020-10-20 13:44:43'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9904, 'Abby Normal', 'Spirit Halloween', 59.0, '2020-10-20 13:44:44'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (6, 'Victor von Frankenstein', 'Confluent Cloud', 21.0, '2020-10-20 13:47:51'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (18, 'Frau Blücher', 'Target', 70.0, '2020-10-20 13:52:01'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (7, 'Victor von Frankenstein', 'Verizon', 100.0, '2020-10-20 13:55:06'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (19, 'Frau Blücher', 'Goodwill', 7.0, '2020-10-20 14:12:32'); - -CREATE STREAM suspicious_transactions - WITH (kafka_topic='suspicious_transactions', partitions=1, value_format='JSON') AS - SELECT T.TXN_ID, T.USERNAME, T.RECIPIENT, T.AMOUNT, T.TIMESTAMP - FROM transactions T - INNER JOIN - suspicious_names S - ON T.RECIPIENT = S.COMPANY_NAME; - -CREATE TABLE accounts_to_monitor - WITH (kafka_topic='accounts_to_monitor', partitions=1, value_format='JSON') AS - SELECT TIMESTAMPTOSTRING(WINDOWSTART, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_START, - TIMESTAMPTOSTRING(WINDOWEND, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_END, - USERNAME - FROM suspicious_transactions - WINDOW TUMBLING (SIZE 24 HOURS) - GROUP BY USERNAME - HAVING COUNT(*) > 3; \ No newline at end of file diff --git a/_includes/tutorials/anomaly-detection/ksql/code/test/input.json b/_includes/tutorials/anomaly-detection/ksql/code/test/input.json new file mode 100644 index 0000000000..9edc20423e --- /dev/null +++ b/_includes/tutorials/anomaly-detection/ksql/code/test/input.json @@ -0,0 +1,18 @@ +{ + "inputs": [ + { "topic": "suspicious_names", "key": "Verizon", "value": { "CREATED_TS": "2019-03-08 00:00:00", "COMPANY_NAME": "Verizon", "COMPANY_ID": 1 } }, + { "topic": "suspicious_names", "key": "Spirit Halloween", "value": { "CREATED_TS": "2019-10-31 00:00:00", "COMPANY_NAME": "Spirit Halloween", "COMPANY_ID": 2 } }, + { "topic": "suspicious_names", "key": "Best Buy", "value": { "CREATED_TS": "2019-12-15 00:00:00", "COMPANY_NAME": "Best Buy", "COMPANY_ID": 3 } }, + { "topic": "transactions", "value": { "TXN_ID": 9900, "USERNAME": "Abby Normal", "RECIPIENT": "Verizon", "AMOUNT": 22.0, "TS": "2020-10-20 13:05:36" } }, + { "topic": "transactions", "value": { "TXN_ID": 12, "USERNAME": "Victor von Frankenstein", "RECIPIENT": "Tattered Cover", "AMOUNT": 7.0, "TS": "2020-10-20 13:07:59" } }, + { "topic": "transactions", "value": { "TXN_ID": 13, "USERNAME": "Frau Blücher", "RECIPIENT": "Peebles", "AMOUNT": 70.0, "TS": "2020-10-20 13:15:00" } }, + { "topic": "transactions", "value": { "TXN_ID": 9903, "USERNAME": "Abby Normal", "RECIPIENT": "Verizon", "AMOUNT": 61.0, "TS": "2020-10-20 13:31:02" } }, + { "topic": "transactions", "value": { "TXN_ID": 9901, "USERNAME": "Abby Normal", "RECIPIENT": "Spirit Halloween", "AMOUNT": 83.0, "TS": "2020-10-20 13:44:41" } }, + { "topic": "transactions", "value": { "TXN_ID": 9902, "USERNAME": "Abby Normal", "RECIPIENT": "Spirit Halloween", "AMOUNT": 46.0, "TS": "2020-10-20 13:44:43" } }, + { "topic": "transactions", "value": { "TXN_ID": 9904, "USERNAME": "Abby Normal", "RECIPIENT": "Spirit Halloween", "AMOUNT": 59.0, "TS": "2020-10-20 13:44:44" } }, + { "topic": "transactions", "value": { "TXN_ID": 6, "USERNAME": "Victor von Frankenstein", "RECIPIENT": "Confluent Cloud", "AMOUNT": 21.0, "TS": "2020-10-20 13:47:51" } }, + { "topic": "transactions", "value": { "TXN_ID": 18, "USERNAME": "Frau Blücher", "RECIPIENT": "Target", "AMOUNT": 70.0, "TS": "2020-10-20 13:52:01" } }, + { "topic": "transactions", "value": { "TXN_ID": 7, "USERNAME": "Victor von Frankenstein", "RECIPIENT": "Verizon", "AMOUNT": 100.0, "TS": "2020-10-20 13:55:06" } }, + { "topic": "transactions", "value": { "TXN_ID": 19, "USERNAME": "Frau Blücher", "RECIPIENT": "Goodwill", "AMOUNT": 7.0, "TS": "2020-10-20 14:12:32" } } + ] +} diff --git a/_includes/tutorials/anomaly-detection/ksql/code/test/output.json b/_includes/tutorials/anomaly-detection/ksql/code/test/output.json index a1adc128ec..80b546fbc7 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/test/output.json +++ b/_includes/tutorials/anomaly-detection/ksql/code/test/output.json @@ -7,7 +7,7 @@ "TXN_ID": 9900, "USERNAME": "Abby Normal", "AMOUNT": 22.0, - "TIMESTAMP": "2020-10-20 13:05:36" + "TS": "2020-10-20 13:05:36" } }, { @@ -17,7 +17,7 @@ "TXN_ID": 9903, "USERNAME": "Abby Normal", "AMOUNT": 61.0, - "TIMESTAMP": "2020-10-20 13:31:02" + "TS": "2020-10-20 13:31:02" } }, { @@ -27,7 +27,7 @@ "TXN_ID": 9901, "USERNAME": "Abby Normal", "AMOUNT": 83.0, - "TIMESTAMP": "2020-10-20 13:44:41" + "TS": "2020-10-20 13:44:41" } }, { @@ -37,7 +37,7 @@ "TXN_ID": 9902, "USERNAME": "Abby Normal", "AMOUNT": 46.0, - "TIMESTAMP": "2020-10-20 13:44:43" + "TS": "2020-10-20 13:44:43" } }, { @@ -47,7 +47,7 @@ "TXN_ID": 9904, "USERNAME": "Abby Normal", "AMOUNT": 59.0, - "TIMESTAMP": "2020-10-20 13:44:44" + "TS": "2020-10-20 13:44:44" } }, { @@ -57,42 +57,9 @@ "TXN_ID": 7, "USERNAME": "Victor von Frankenstein", "AMOUNT": 100.0, - "TIMESTAMP": "2020-10-20 13:55:06" + "TS": "2020-10-20 13:55:06" } }, - { - "topic": "accounts_to_monitor", - "key": "Abby Normal", - "window": { - "start": 1603152000000, - "end": 1603238400000, - "type": "time" - }, - "value": null, - "timestamp": 1603199136000 - }, - { - "topic": "accounts_to_monitor", - "key": "Abby Normal", - "window": { - "start": 1603152000000, - "end": 1603238400000, - "type": "time" - }, - "value": null, - "timestamp": 1603200662000 - }, - { - "topic": "accounts_to_monitor", - "key": "Abby Normal", - "window": { - "start": 1603152000000, - "end": 1603238400000, - "type": "time" - }, - "value": null, - "timestamp": 1603201481000 - }, { "topic": "accounts_to_monitor", "key": "Abby Normal", @@ -120,17 +87,6 @@ "WINDOW_END": "2020-10-21 00:00:00 +0000" }, "timestamp": 1603201484000 - }, - { - "topic": "accounts_to_monitor", - "key": "Victor von Frankenstein", - "window": { - "start": 1603152000000, - "end": 1603238400000, - "type": "time" - }, - "value": null, - "timestamp": 1603202106000 } ] diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-accounts-to-monitor-table.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-accounts-to-monitor-table.sql index 982cddc25d..d4561c776a 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-accounts-to-monitor-table.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-accounts-to-monitor-table.sql @@ -1,8 +1,8 @@ CREATE TABLE accounts_to_monitor WITH (kafka_topic='accounts_to_monitor', partitions=1, value_format='JSON') AS - SELECT TIMESTAMPTOSTRING(WINDOWSTART, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_START, - TIMESTAMPTOSTRING(WINDOWEND, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_END, - USERNAME + SELECT USERNAME, + TIMESTAMPTOSTRING(WINDOWSTART, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_START, + TIMESTAMPTOSTRING(WINDOWEND, 'yyyy-MM-dd HH:mm:ss Z') AS WINDOW_END FROM suspicious_transactions WINDOW TUMBLING (SIZE 24 HOURS) GROUP BY USERNAME diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-names-table.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-names-table.sql index 374a78297d..a81f3deb1c 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-names-table.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-names-table.sql @@ -1,8 +1,8 @@ -CREATE TABLE suspicious_names (CREATED_DATE VARCHAR, +CREATE TABLE suspicious_names (CREATED_TS VARCHAR, COMPANY_NAME VARCHAR PRIMARY KEY, COMPANY_ID INT) WITH (kafka_topic='suspicious_names', partitions=1, value_format='JSON', - timestamp='CREATED_DATE', - timestamp_format='yyyy-MM-dd HH:mm:ss'); + timestamp='CREATED_TS', + timestamp_format = 'yyyy-MM-dd HH:mm:ss'); diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-transactions-stream.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-transactions-stream.sql index cd9d0494b7..1337ed6b73 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-transactions-stream.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-transactions-stream.sql @@ -1,6 +1,6 @@ CREATE STREAM suspicious_transactions WITH (kafka_topic='suspicious_transactions', partitions=1, value_format='JSON') AS - SELECT T.TXN_ID, T.USERNAME, T.RECIPIENT, T.AMOUNT, T.TIMESTAMP + SELECT T.TXN_ID, T.USERNAME, T.RECIPIENT, T.AMOUNT, T.TS FROM transactions T INNER JOIN suspicious_names S diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-transactions-stream.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-transactions-stream.sql index a9a13df994..56c87505f8 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-transactions-stream.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-transactions-stream.sql @@ -1,6 +1,6 @@ -CREATE STREAM transactions (TXN_ID BIGINT, USERNAME VARCHAR, RECIPIENT VARCHAR, AMOUNT DOUBLE, TIMESTAMP VARCHAR) +CREATE STREAM transactions (TXN_ID BIGINT, USERNAME VARCHAR, RECIPIENT VARCHAR, AMOUNT DOUBLE, TS VARCHAR) WITH (kafka_topic='transactions', partitions=1, value_format='JSON', - timestamp='TIMESTAMP', - timestamp_format='yyyy-MM-dd HH:mm:ss'); + timestamp='TS', + timestamp_format = 'yyyy-MM-dd HH:mm:ss'); diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-suspicious-names.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-suspicious-names.sql index 3d47791c0a..09be958f4e 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-suspicious-names.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-suspicious-names.sql @@ -1,3 +1,3 @@ -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-03-08 00:00:00', 'Verizon', 1); -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-10-31 00:00:00', 'Spirit Halloween', 2); -INSERT INTO suspicious_names (CREATED_DATE, COMPANY_NAME, COMPANY_ID) VALUES ('2019-12-15 00:00:00', 'Best Buy', 3); +INSERT INTO suspicious_names (CREATED_TS, COMPANY_NAME, COMPANY_ID) VALUES (FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (5 * 24 * 60 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss'), 'Verizon', 1); +INSERT INTO suspicious_names (CREATED_TS, COMPANY_NAME, COMPANY_ID) VALUES (FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (4 * 24 * 60 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss'), 'Spirit Halloween', 2); +INSERT INTO suspicious_names (CREATED_TS, COMPANY_NAME, COMPANY_ID) VALUES (FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (3 * 24 * 60 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss'), 'Best Buy', 3); diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-transactions.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-transactions.sql index 76b4ffe69a..f4eaf73fc3 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-transactions.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-transactions.sql @@ -1,11 +1,11 @@ -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9900, 'Abby Normal', 'Verizon', 22.0, '2020-10-20 13:05:36'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (12, 'Victor von Frankenstein', 'Tattered Cover', 7.0, '2020-10-20 13:07:59'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (13, 'Frau Blücher', 'Peebles', 70.0, '2020-10-20 13:15:00'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9903, 'Abby Normal', 'Verizon', 61.0, '2020-10-20 13:31:02'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9901, 'Abby Normal', 'Spirit Halloween', 83.0, '2020-10-20 13:44:41'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9902, 'Abby Normal', 'Spirit Halloween', 46.0, '2020-10-20 13:44:43'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (9904, 'Abby Normal', 'Spirit Halloween', 59.0, '2020-10-20 13:44:44'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (6, 'Victor von Frankenstein', 'Confluent Cloud', 21.0, '2020-10-20 13:47:51'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (18, 'Frau Blücher', 'Target', 70.0, '2020-10-20 13:52:01'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (7, 'Victor von Frankenstein', 'Verizon', 100.0, '2020-10-20 13:55:06'); -INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TIMESTAMP) VALUES (19, 'Frau Blücher', 'Goodwill', 7.0, '2020-10-20 14:12:32'); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (9900, 'Abby Normal', 'Verizon', 22.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 2 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (12, 'Victor von Frankenstein', 'Tattered Cover', 7.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 3 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (13, 'Frau Blücher', 'Peebles', 70.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 4 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (9903, 'Abby Normal', 'Verizon', 61.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 5 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (9901, 'Abby Normal', 'Spirit Halloween', 83.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 6 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (9902, 'Abby Normal', 'Spirit Halloween', 46.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 7 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (9904, 'Abby Normal', 'Spirit Halloween', 59.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 8 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (6, 'Victor von Frankenstein', 'Confluent Cloud', 21.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 9 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (18, 'Frau Blücher', 'Target', 70.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 10 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (7, 'Victor von Frankenstein', 'Verizon', 100.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 11 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); +INSERT INTO transactions (TXN_ID, USERNAME, RECIPIENT, AMOUNT, TS) VALUES (19, 'Frau Blücher', 'Goodwill', 7.0, FORMAT_TIMESTAMP(FROM_UNIXTIME(UNIX_TIMESTAMP() - (1 * 24 * 60 * 60 * 1000 + 12 * 60 * 1000)),'yyyy-MM-dd HH:mm:ss')); diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.log b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.log index 26dbed23bb..df2f92cf25 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.log +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.log @@ -1,6 +1,6 @@ -+-------------------------+-------------------------+-------------------------+ -|USERNAME |WINDOW_START |WINDOW_END | -+-------------------------+-------------------------+-------------------------+ -|Abby Normal |2020-10-20 00:00:00 +0000|2020-10-21 00:00:00 +0000| ++-------------------------+-------------------------+-------------------------+-------------------------+-------------------------+ +|USERNAME |WINDOWSTART |WINDOWEND |WINDOW_START |WINDOW_END | ++-------------------------+-------------------------+-------------------------+-------------------------+-------------------------+ +|Abby Normal |1666137600000 |1666224000000 |2022-10-19 00:00:00 +0000|2022-10-20 00:00:00 +0000| Limit Reached Query terminated diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.sql b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.sql index 3580fca492..a0b8163177 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.sql +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.sql @@ -1,4 +1,4 @@ -SELECT USERNAME, WINDOW_START, WINDOW_END +SELECT * FROM ACCOUNTS_TO_MONITOR EMIT CHANGES LIMIT 1; diff --git a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/test/run-tests.sh b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/test/run-tests.sh index 9948a4cb9d..19cbea0db2 100644 --- a/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/test/run-tests.sh +++ b/_includes/tutorials/anomaly-detection/ksql/code/tutorial-steps/test/run-tests.sh @@ -1 +1 @@ -docker exec ksqldb-cli ksql-test-runner -s /opt/app/src/test-statements.sql -o /opt/app/test/output.json +docker exec ksqldb-cli ksql-test-runner -i /opt/app/test/input.json -s /opt/app/src/statements.sql -o /opt/app/test/output.json diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/answer/join-and-windowing.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/answer/join-and-windowing.adoc index 9e64e0b6d6..64caa0bccd 100644 --- a/_includes/tutorials/anomaly-detection/ksql/markup/answer/join-and-windowing.adoc +++ b/_includes/tutorials/anomaly-detection/ksql/markup/answer/join-and-windowing.adoc @@ -1,4 +1,4 @@ -Assuming transaction events are joined with table reference data, use `WINDOWING` to group anomalous transactions. +Assuming transaction events are joined with table reference data, use windowing to group anomalous transactions. +++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-accounts-to-monitor-table.sql %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-accounts-to-monitor-table.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-accounts-to-monitor-table.adoc
index a8e321f365..440fa6cbbc 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-accounts-to-monitor-table.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-accounts-to-monitor-table.adoc
@@ -1,4 +1,4 @@
-For this use case, let's say that a single transaction to one of the companies in the suspicious_names table is probably okay, but multiple transactions to one or more of those companies in a 24-hour period is an anomaly. ksqlDB gives us the ability to see if any anomalies are present for a particular user with the following query. Create the accounts_to_monitor table by copying and pasting the content below into the ksqlDB CLI.
+For this use case, let's say that a single transaction to one of the companies in the `suspicious_names` table is probably okay, but multiple transactions to one or more of those companies in a 24-hour period is an anomaly. ksqlDB gives us the ability to see if any anomalies are present for a particular user with the following query. Create the `accounts_to_monitor` table by copying and pasting the content below into the ksqlDB CLI.
[source,sql]
----
@@ -14,8 +14,8 @@ CREATE TABLE accounts_to_monitor
----
+++++
1 The fields `WINDOW_START` and `WINDOW_END` tell us what interval of time suspicious activity occurred.
+1 The fields WINDOW_START and WINDOW_END tell us what interval of time suspicious activity occurred.
2 The WINDOW TUMBLING part of the query allows us to do an aggregation with distinct time boundaries. In this case our window is fixed at a length of 24 hours, does not allow gaps, and does not allow overlapping. Other types of windows are explained in the "Collect data over time" section of Kafka Tutorials. For more in-depth descriptions and visualizations, checkout the ksqlDB documentation about windows.
3 The last two lines of the query address how you would determine if a user had multiple suspicious transactions. This aspect of the query says, in essence, if any user has greater than 3 suspicious transactions during the window, emit an event to the accounts_to_monitor table.
+3 The last two lines of the query address how you would determine if a user had multiple suspicious transactions. This aspect of the query says, in essence, if any user has greater than 3 suspicious transactions in a 24-hour window, emit an event to the accounts_to_monitor table.
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-names-table.sql %}
+++++
-A table is more fitting for this suspicious names data because it is a mutable collection that changes over time. We may want to add company names to this table or remove them in the future.
+A table is more fitting than a stream for the suspicious names data because it is a mutable collection that changes over time. We may want to add company names to this table or remove them in the future.
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-suspicious-transactions-stream.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-suspicious-transactions-stream.adoc
index 79761d3026..d7392902b9 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-suspicious-transactions-stream.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-suspicious-transactions-stream.adoc
@@ -1,4 +1,4 @@
-Using the table of suspicious names and stream of transactions, create a new stream of events containing only those transactions that were sent to an account name contained in the 'suspicious_names' table. We can do this by performing an `INNER JOIN`. In this case the `INNER JOIN` will couple events in the transaction stream where the "recipient" is the same as "company_name" in the suspicious_names table. The stream created below will continuously be populated by the coupled events created by the query.
+Using the table of suspicious names and stream of transactions, create a new stream of events containing only those transactions that were sent to an account name contained in the `suspicious_names` table. We can do this by performing an `INNER JOIN`. In this case the `INNER JOIN` will couple events in the transaction stream where the `RECIPIENT` is the same as `COMPANY_NAME` in the `suspicious_names` table. The stream created below will continuously be populated by the coupled events emitted by the query.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-suspicious-transactions-stream.sql %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-transactions-stream.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-transactions-stream.adoc
index 0047a2a917..69da4d0b59 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-transactions-stream.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/create-transactions-stream.adoc
@@ -1,4 +1,4 @@
-Likewise, you'll need a ksqlDB stream and Kafka topic to represent transaction events. The transaction information includes the identifier, the user sending the money, the name of the recipient, the amount of money sent, and the time of the transaction. Since this data represents a historical sequence of events, a stream is most appropriate.
+Likewise, you'll need a ksqlDB stream and Kafka topic to represent transaction events. The transaction information includes the identifier, the user sending the money, the name of the recipient, the amount of money sent, and the time of the transaction. Since this data represents a historical sequence of events, a stream is more appropriate than a table.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/create-transactions-stream.sql %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/find-suspicious-transactions.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/find-suspicious-transactions.adoc
index 50c1cb2d65..3950690aaf 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/find-suspicious-transactions.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/find-suspicious-transactions.adoc
@@ -4,7 +4,7 @@ Inspect the new stream.
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/find-suspicious-transactions.sql %}
+++++
-Note that some of the transactions we inserted earlier were to companies that are in the suspicious names table.
+Note that some of the transactions that we inserted earlier were to companies that are in the `suspicious_names` table.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/find-suspicious-transactions-output.log %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/make-src-file.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/make-src-file.adoc
index 9f9ce848b5..9e5f3b478e 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/make-src-file.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/make-src-file.adoc
@@ -1,5 +1,5 @@
-Now that you have a series of statements that's doing the right thing, the last step is to put them into a file so that they can be used outside the CLI session. Create a file at `src/test-statements.sql` with the following content that represents the suspicious names (In production, you would likely use Kafka Connect to read the suspicious names from a database into a Kafka topic, and then create a ksqlDB stream for it).
+Now that you have a series of statements that's doing the right thing, the last step is to put them into a file so that they can be used outside the CLI session. Create a file at `src/statements.sql` with the following content that represents the suspicious names (In production, you would likely use Kafka Connect to read the suspicious names from a database into a Kafka topic, and then create a ksqlDB stream for it).
+++++
-{% include_raw tutorials/anomaly-detection/ksql/code/src/test-statements.sql %}
+{% include_raw tutorials/anomaly-detection/ksql/code/src/statements.sql %}
+++++
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-suspicious-names.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-suspicious-names.adoc
index 8790c5feef..2a7b0e4432 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-suspicious-names.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-suspicious-names.adoc
@@ -1,4 +1,4 @@
-Let's add some suspicious names data into our reference table.
+Let's add some suspicious names data into our reference table. Note that the timestamps for these records are between 3 and 5 days ago.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-suspicious-names.sql %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-transactions.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-transactions.adoc
index 376f812663..b97708f0a4 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-transactions.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/populate-transactions.adoc
@@ -1,4 +1,4 @@
-Let's add some transaction data into our event stream.
+Let's add some transaction data into our event stream. Note that the timestamps for these transactions are all within the past day, i.e., _after_ the timestamps of the suspicious name records.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/populate-transactions.sql %}
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/dev/print-accounts-to-monitor.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/dev/print-accounts-to-monitor.adoc
index 151b9b722f..532e0be3b4 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/dev/print-accounts-to-monitor.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/dev/print-accounts-to-monitor.adoc
@@ -10,9 +10,9 @@ The output should look like the following:
{% include_raw tutorials/anomaly-detection/ksql/code/tutorial-steps/dev/print-accounts-to-monitor.log %}
+++++
-Note that if you were to alter the `LIMIT` of results to something greater than 1, you would not see any other accounts flagged even though Victor von Frankenstein had a transaction that was flagged as suspicious. If you decided to rerun the query with a new limit, use `CTRL+D` to terminate the query. +
+Note that if you were to alter the `LIMIT` of results to something greater than 1, you would not see any other accounts flagged even though Victor von Frankenstein had a transaction that was flagged as suspicious. If you decided to rerun the query with a new limit, enter `Ctrl+D` to terminate the query. +
-Events within the Kafka topic accounts_to_monitor can be used to drive monitoring and alerting applications that could take action such as placing a hold on the account, notifying the card holder, etc. +
+Events within the Kafka topic `accounts_to_monitor` can be used to drive monitoring and alerting applications that could take action such as placing a hold on the account, notifying the card holder, etc. +
-Type 'exit' and hit enter to shutdown the ksqlDB cli.
+Type 'exit' and hit enter to shutdown the ksqlDB CLI.
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/prod/submit-to-api.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/prod/submit-to-api.adoc
index 45b56c20f5..f6dc51fbb2 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/prod/submit-to-api.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/prod/submit-to-api.adoc
@@ -1,9 +1,3 @@
-Create a file at `src/statements.sql` with the following content that represents the statements we tested above without the test data.
-
-+++++
-{% include_raw tutorials/anomaly-detection/ksql/code/src/statements.sql %}
-+++++
-
Launch your statements into production by sending them to the REST API with the following command:
+++++
diff --git a/_includes/tutorials/anomaly-detection/ksql/markup/test/make-test-output.adoc b/_includes/tutorials/anomaly-detection/ksql/markup/test/make-test-output.adoc
index 503dd371b3..52abad045a 100644
--- a/_includes/tutorials/anomaly-detection/ksql/markup/test/make-test-output.adoc
+++ b/_includes/tutorials/anomaly-detection/ksql/markup/test/make-test-output.adoc
@@ -1,4 +1,4 @@
-Create a file at `test/output.json` with the expected outputs. ksqlDB joins its grouping key with the window boundaries, we need to use a bit of extra expression to describe what to expect. We leverage the window key to describe the start and end boundaries that the key represents. Checkout our tutorial on link:{{ "create-tumbling-windows/ksql.html" | relative_url }}[*tumbling windows*] for a more comprehensive explanation.
+Create a file at `test/output.json` with the expected outputs. Notice that because ksqlDB joins its grouping key with the window boundaries, we need to use a bit of extra expression to describe what to expect. We leverage the window key to describe the start and end boundaries that the key represents. Checkout our tutorial on link:{{ "create-tumbling-windows/ksql.html" | relative_url }}[*tumbling windows*] for a more comprehensive explanation.
+++++
{% include_raw tutorials/anomaly-detection/ksql/code/test/output.json %}
diff --git a/_includes/tutorials/deserialization-errors/ksql/code/tutorial-steps/dev/wait-for-containers.sh b/_includes/tutorials/deserialization-errors/ksql/code/tutorial-steps/dev/wait-for-containers.sh
index 493cc9446b..72dfa0c94a 100755
--- a/_includes/tutorials/deserialization-errors/ksql/code/tutorial-steps/dev/wait-for-containers.sh
+++ b/_includes/tutorials/deserialization-errors/ksql/code/tutorial-steps/dev/wait-for-containers.sh
@@ -1,3 +1,3 @@
while [ $(curl -s -o /dev/null -w %{http_code} http://localhost:8088/) -eq 000 ] ; do sleep 5 ; done;
-# Back off for KSQL server to get out of the initialization phase.
+# Back off so ksqlDB can complete the initialization phase.
sleep 5
diff --git a/_includes/tutorials/hopping-windows/ksql/code/tutorial-steps/dev/wait-for-containers.sh b/_includes/tutorials/hopping-windows/ksql/code/tutorial-steps/dev/wait-for-containers.sh
index 493cc9446b..72dfa0c94a 100755
--- a/_includes/tutorials/hopping-windows/ksql/code/tutorial-steps/dev/wait-for-containers.sh
+++ b/_includes/tutorials/hopping-windows/ksql/code/tutorial-steps/dev/wait-for-containers.sh
@@ -1,3 +1,3 @@
while [ $(curl -s -o /dev/null -w %{http_code} http://localhost:8088/) -eq 000 ] ; do sleep 5 ; done;
-# Back off for KSQL server to get out of the initialization phase.
+# Back off so ksqlDB can complete the initialization phase.
sleep 5
diff --git a/_includes/tutorials/joining-stream-stream/ksql/markup/dev/print-output-topic.adoc b/_includes/tutorials/joining-stream-stream/ksql/markup/dev/print-output-topic.adoc
index be342f61d4..9e71b39198 100644
--- a/_includes/tutorials/joining-stream-stream/ksql/markup/dev/print-output-topic.adoc
+++ b/_includes/tutorials/joining-stream-stream/ksql/markup/dev/print-output-topic.adoc
@@ -11,4 +11,4 @@ This should yield the following output:
As you can see, the output sits in a plain Kafka topic and therefore, any application that is able to consume data from it will be able to have access to this data.
-Type 'exit' and hit enter to exit the ksqlDB cli.
\ No newline at end of file
+Type 'exit' and hit enter to exit the ksqlDB CLI.
\ No newline at end of file
diff --git a/_includes/tutorials/joining-stream-table/ksql/markup/dev/print-output-topic.adoc b/_includes/tutorials/joining-stream-table/ksql/markup/dev/print-output-topic.adoc
index 20c172b03a..e954447c43 100644
--- a/_includes/tutorials/joining-stream-table/ksql/markup/dev/print-output-topic.adoc
+++ b/_includes/tutorials/joining-stream-table/ksql/markup/dev/print-output-topic.adoc
@@ -10,4 +10,4 @@ This should yield the following output:
{% include_raw tutorials/joining-stream-table/ksql/code/tutorial-steps/dev/expected-print.log %}
+++++
-Type 'exit' and hit enter to exit the ksqlDB cli.
\ No newline at end of file
+Type 'exit' and hit enter to exit the ksqlDB CLI.
\ No newline at end of file
diff --git a/_includes/tutorials/joining-table-table/ksql/markup/dev/print-output-topic.adoc b/_includes/tutorials/joining-table-table/ksql/markup/dev/print-output-topic.adoc
index 75e290adda..24ae52da5a 100644
--- a/_includes/tutorials/joining-table-table/ksql/markup/dev/print-output-topic.adoc
+++ b/_includes/tutorials/joining-table-table/ksql/markup/dev/print-output-topic.adoc
@@ -9,4 +9,4 @@ This should yield the following output:
{% include_raw tutorials/joining-table-table/ksql/code/tutorial-steps/dev/expected-print.log %}
+++++
-Type 'exit' and hit enter to exit the ksqlDB cli.
\ No newline at end of file
+Type 'exit' and hit enter to exit the ksqlDB CLI.
\ No newline at end of file
diff --git a/_includes/tutorials/masking-data/ksql/markup/dev/print-purchases-obfuscated-topic.adoc b/_includes/tutorials/masking-data/ksql/markup/dev/print-purchases-obfuscated-topic.adoc
index 153a40e3aa..0d2b345645 100644
--- a/_includes/tutorials/masking-data/ksql/markup/dev/print-purchases-obfuscated-topic.adoc
+++ b/_includes/tutorials/masking-data/ksql/markup/dev/print-purchases-obfuscated-topic.adoc
@@ -23,4 +23,4 @@ In the example above, the following types of characters in `CUSTOMER_NAME` would
ksqlDB offers a variety of different masking functions that allow you to mask the farthest or nearest _x_ number of characters on right or left. Check out the https://docs.ksqldb.io/en/latest/developer-guide/ksqldb-reference/scalar-functions/#mask[*ksqlDB documentation*,window=_blank] for more information.
====
-Type 'exit' and hit enter to shutdown the ksqlDB cli.
+Type 'exit' and hit enter to shutdown the ksqlDB CLI.
diff --git a/_includes/tutorials/session-windows/ksql/markup/test_it.html b/_includes/tutorials/session-windows/ksql/markup/test_it.html
index 015ba92b32..ad7bc21f6d 100644
--- a/_includes/tutorials/session-windows/ksql/markup/test_it.html
+++ b/_includes/tutorials/session-windows/ksql/markup/test_it.html
@@ -7,7 +7,7 @@ {% include_raw tutorials/session-windows/ksql/code/test/input.json %}
- Similarly, create a file at test/output.json with the expected outputs. Notice that because KSQL joins its grouping key with the window boundaries, we need to use a bit of extra expression to describe what to expect. We leverage the window key to describe the start and end boundaries that the key represents.
+ Similarly, create a file at test/output.json with the expected outputs. Notice that because ksqlDB joins its grouping key with the window boundaries, we need to use a bit of extra expression to describe what to expect. We leverage the window key to describe the start and end boundaries that the key represents.
{% include_raw tutorials/session-windows/ksql/code/test/output.json %}
diff --git a/_includes/tutorials/session-windows/ksql/markup/try_it.html b/_includes/tutorials/session-windows/ksql/markup/try_it.html
index 5a1be6757f..176e9dd115 100644
--- a/_includes/tutorials/session-windows/ksql/markup/try_it.html
+++ b/_includes/tutorials/session-windows/ksql/markup/try_it.html
@@ -4,7 +4,7 @@
{% include_raw tutorials/session-windows/ksql/code/tutorial-steps/dev/print-topic.sql %}
+++++
-Notice that the key for each message contains some strange characters that aren't quite printable. KSQL has combined the grouping key (IP address) with its window boundaries using a format that's not quite printable in this format. It should look something like this:
+Notice that the key for each message contains some strange characters that aren't quite printable. This occurs because ksqlDB has combined the grouping key (IP address) with its window boundaries using a format that's not quite printable in this format. It should look something like this:
+++++
{% include_raw tutorials/session-windows/ksql/code/tutorial-steps/dev/expected-print-topic.log %}