From 665058fc847070203d894993ce5ace17f22194a6 Mon Sep 17 00:00:00 2001 From: bigint <69431456+bigint@users.noreply.github.com> Date: Mon, 6 Nov 2023 10:52:30 +0530 Subject: [PATCH] feat: migrate feeds to use clickhouse client --- packages/workers/feeds/.dev.vars.example | 2 +- packages/workers/feeds/package.json | 1 + .../hey/algorithms/heyMostInteracted.ts | 39 ++++++++++++------- .../providers/hey/algorithms/heyMostViewed.ts | 36 ++++++++++------- .../src/providers/hey/clickhouseQuery.ts | 27 ------------- packages/workers/feeds/src/types.ts | 2 +- packages/workers/feeds/wrangler.toml | 2 +- pnpm-lock.yaml | 3 ++ 8 files changed, 52 insertions(+), 60 deletions(-) delete mode 100644 packages/workers/feeds/src/providers/hey/clickhouseQuery.ts diff --git a/packages/workers/feeds/.dev.vars.example b/packages/workers/feeds/.dev.vars.example index b7b8e24e1c33..4264033d63b4 100644 --- a/packages/workers/feeds/.dev.vars.example +++ b/packages/workers/feeds/.dev.vars.example @@ -1 +1 @@ -CLICKHOUSE_REST_ENDPOINT="" +CLICKHOUSE_PASSWORD="" diff --git a/packages/workers/feeds/package.json b/packages/workers/feeds/package.json index 01fe4e9e6cdf..a86a4063a518 100644 --- a/packages/workers/feeds/package.json +++ b/packages/workers/feeds/package.json @@ -15,6 +15,7 @@ "worker:deploy": "wrangler deploy --var RELEASE:\"$(git rev-parse HEAD)\"" }, "dependencies": { + "@hey/clickhouse": "workspace:*", "@hey/data": "workspace:*", "@hey/lib": "workspace:*", "itty-router": "^4.0.23", diff --git a/packages/workers/feeds/src/providers/hey/algorithms/heyMostInteracted.ts b/packages/workers/feeds/src/providers/hey/algorithms/heyMostInteracted.ts index c100822b5d91..d2d880ebdff2 100644 --- a/packages/workers/feeds/src/providers/hey/algorithms/heyMostInteracted.ts +++ b/packages/workers/feeds/src/providers/hey/algorithms/heyMostInteracted.ts @@ -1,9 +1,9 @@ +import createClickhouseClient from '@hey/clickhouse/createClickhouseClient'; import { Errors } from '@hey/data/errors'; import { PUBLICATION } from '@hey/data/tracking'; import randomizeIds from '../../../helpers/randomizeIds'; import type { Env } from '../../../types'; -import clickhouseQuery from '../clickhouseQuery'; const interactionAndWeights = { [PUBLICATION.COLLECT_MODULE.COLLECT]: 10, @@ -38,32 +38,41 @@ const heyMostInteracted = async ( } try { - const query = ` - SELECT + const client = createClickhouseClient(env.CLICKHOUSE_PASSWORD); + const rows = await client.query({ + query: ` + SELECT JSONExtractString(properties, 'publication_id') AS publication_id, SUM(CASE ${generateWeightedCaseStatement()} ELSE 0 END) AS weighted_interaction_count - FROM - events - WHERE + FROM + events + WHERE name IN (${interactionEvents.map((name) => `'${name}'`).join(',')}) AND JSONHas(properties, 'publication_id') AND created >= now() - INTERVAL 1 DAY - GROUP BY + GROUP BY publication_id - HAVING + HAVING publication_id IS NOT NULL - AND + AND publication_id != '' - ORDER BY + ORDER BY weighted_interaction_count DESC - LIMIT ${limit} - OFFSET ${offset}; - `; - const response = await clickhouseQuery(query, env); - const ids = response.map((row) => row[0]); + LIMIT ${limit} + OFFSET ${offset}; + `, + format: 'JSONEachRow' + }); + + const result = + await rows.json< + Array<{ publication_id: string; weighted_interaction_count: number }> + >(); + + const ids = result.map((row) => row.publication_id); return randomizeIds(ids); } catch { diff --git a/packages/workers/feeds/src/providers/hey/algorithms/heyMostViewed.ts b/packages/workers/feeds/src/providers/hey/algorithms/heyMostViewed.ts index a92d68cc9958..8051d0313db2 100644 --- a/packages/workers/feeds/src/providers/hey/algorithms/heyMostViewed.ts +++ b/packages/workers/feeds/src/providers/hey/algorithms/heyMostViewed.ts @@ -1,10 +1,10 @@ +import createClickhouseClient from '@hey/clickhouse/createClickhouseClient'; import { Errors } from '@hey/data/errors'; import { PAGEVIEW } from '@hey/data/tracking'; import randomizeIds from '../../../helpers/randomizeIds'; import removeParamsFromString from '../../../helpers/removeParamsFromString'; import type { Env } from '../../../types'; -import clickhouseQuery from '../clickhouseQuery'; const heyMostViewed = async ( limit: number, @@ -16,28 +16,34 @@ const heyMostViewed = async ( } try { - const query = ` - SELECT + const client = createClickhouseClient(env.CLICKHOUSE_PASSWORD); + const rows = await client.query({ + query: ` + SELECT url, COUNT(*) AS view_count - FROM + FROM events - WHERE + WHERE name = '${PAGEVIEW}' AND url LIKE 'https://hey.xyz/posts/%' AND created >= now() - INTERVAL 1 DAY - GROUP BY + GROUP BY url - ORDER BY + ORDER BY view_count DESC - LIMIT ${limit} - OFFSET ${offset}; - `; - const response = await clickhouseQuery(query, env); - - const ids = response.map((row) => { - const url = row[0]; - const id = url.split('/').pop(); + LIMIT ${limit} + OFFSET ${offset}; + `, + format: 'JSONEachRow' + }); + + const result = + await rows.json>(); + + const ids = result.map((row) => { + const { url } = row; + const id = url.split('/').pop() || ''; return removeParamsFromString(id); }); diff --git a/packages/workers/feeds/src/providers/hey/clickhouseQuery.ts b/packages/workers/feeds/src/providers/hey/clickhouseQuery.ts deleted file mode 100644 index b3a3fa04b610..000000000000 --- a/packages/workers/feeds/src/providers/hey/clickhouseQuery.ts +++ /dev/null @@ -1,27 +0,0 @@ -import type { Env } from '../../types'; - -const clickhouseQuery = async (query: string, env: Env) => { - try { - const response = await fetch( - `${env.CLICKHOUSE_REST_ENDPOINT}&default_format=JSONCompact`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - cf: { cacheTtl: 600, cacheEverything: true }, - body: query - } - ); - - if (response.status !== 200) { - return []; - } - - const json: { data: [any][] } = await response.json(); - - return json.data; - } catch { - return []; - } -}; - -export default clickhouseQuery; diff --git a/packages/workers/feeds/src/types.ts b/packages/workers/feeds/src/types.ts index 8a5cd739eafa..d5f0a4665db5 100644 --- a/packages/workers/feeds/src/types.ts +++ b/packages/workers/feeds/src/types.ts @@ -2,7 +2,7 @@ import type { IRequestStrict } from 'itty-router'; export interface Env { RELEASE: string; - CLICKHOUSE_REST_ENDPOINT: string; + CLICKHOUSE_PASSWORD: string; } export type WorkerRequest = { diff --git a/packages/workers/feeds/wrangler.toml b/packages/workers/feeds/wrangler.toml index 247fe869c57a..a35a6b134f85 100644 --- a/packages/workers/feeds/wrangler.toml +++ b/packages/workers/feeds/wrangler.toml @@ -10,4 +10,4 @@ routes = [ [env.production.vars] RELEASE = "" -CLICKHOUSE_REST_ENDPOINT = "" +CLICKHOUSE_PASSWORD = "" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d9155913022d..e77932934167 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -605,6 +605,9 @@ importers: packages/workers/feeds: dependencies: + '@hey/clickhouse': + specifier: workspace:* + version: link:../../clickhouse '@hey/data': specifier: workspace:* version: link:../../data