From 94ef56358daad39990887f79a3aad94c8e8786c7 Mon Sep 17 00:00:00 2001 From: Guillaume Clochard Date: Fri, 21 Jul 2023 14:43:05 +0200 Subject: [PATCH] working rss-to-tana --- rss-to-tana/.dockerignore | 3 ++ rss-to-tana/fly.toml | 16 +++++++++ rss-to-tana/index.js | 51 ++++++++++++++++---------- rss-to-tana/item.js | 75 ++++++++++++++++++++++++++------------- rss-to-tana/runner.js | 24 +++++++++++++ rss-to-tana/tana.js | 10 +++--- 6 files changed, 130 insertions(+), 49 deletions(-) create mode 100644 rss-to-tana/.dockerignore create mode 100644 rss-to-tana/fly.toml create mode 100644 rss-to-tana/runner.js diff --git a/rss-to-tana/.dockerignore b/rss-to-tana/.dockerignore new file mode 100644 index 0000000..b015d1e --- /dev/null +++ b/rss-to-tana/.dockerignore @@ -0,0 +1,3 @@ +# flyctl launch added from .gitignore +**/node_modules +fly.toml diff --git a/rss-to-tana/fly.toml b/rss-to-tana/fly.toml new file mode 100644 index 0000000..a01b1d7 --- /dev/null +++ b/rss-to-tana/fly.toml @@ -0,0 +1,16 @@ +# fly.toml app configuration file generated for rss-to-tana on 2023-07-21T12:04:26+02:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = "rss-to-tana" +primary_region = "cdg" + +[build] + builder = "heroku/buildpacks:20" + +[env] + NODE_ENV = "production" + +[processes] + rss = "npm start" diff --git a/rss-to-tana/index.js b/rss-to-tana/index.js index 77be89f..6fef390 100644 --- a/rss-to-tana/index.js +++ b/rss-to-tana/index.js @@ -1,7 +1,8 @@ const RSSParser = require('rss-parser'); const cron = require('node-cron'); -const { handleNewRSSItem } = require('./item'); +const { saveItem } = require('./item'); +const { run } = require('./runner') const Tana = require('./tana'); const parser = new RSSParser(); @@ -9,41 +10,53 @@ const parser = new RSSParser(); const rssFeeds = [ { url: 'https://lesoreillescurieuses.com/feed/', - // cron: '0 0 23,6 * * *', // 23:00 and 06:00 every day - cron: '0 * * * * *', // 23:00 and 06:00 every day + cron: '0 0 23,6 * * *', // 23:00 and 06:00 every day toTana: Tana.album, }, { url: 'https://cmd.wuips.com/rss/feed.xml', - // cron: '0 0 * * * *', // every hour every day - cron: '0 * * * * *', // every hour every day + cron: '0 0 * * * *', // every hour every day toTana: Tana.website, }, ]; -const startTime = new Date('2022-09-27 18:00:00.000.'); +function parseFeed(feed) { + return async function (lastRunDate) { + try { + console.log(feed.url, `parsing for items published after ${lastRunDate.toISOString()}`) + const parsedFeed = await parser.parseURL(feed.url); -async function parseFeed(feed) { - try { - console.log(feed.url, 'parsing') - const parsedFeed = await parser.parseURL(feed.url); + for (const item of parsedFeed.items) { + const pubDate = new Date(item.pubDate); + if (pubDate > lastRunDate) { + console.log(feed.url, `new ${item.title} detected`); - for (const item of parsedFeed.items) { - const pubDate = new Date(item.pubDate); - if (pubDate > startTime) { - const tanaNode = feed.toTana(item) - handleNewRSSItem(feed.url, item, tanaNode); + const tanaNode = feed.toTana(item) + saveItem(tanaNode); + } } + } catch (error) { + console.error(feed.url, `parsing error`, error); } - } catch (error) { - console.error(feed.url, `parsing error`, error); } } for (const feed of rssFeeds) { - console.log('Scheduling', feed.url, 'on', feed.cron) + /** + * We can use FORCE=true env var to run the feeds parsing directly, without + * cron schedule + */ + if (process.env.FORCE === 'true') { + run(parseFeed(feed))() + } else { + console.log('Scheduling', feed.url, 'on', feed.cron) + + if (!cron.validate(feed.cron)) { + throw new Error(`${feed.cron} not a valid cron expression`) + } - cron.schedule(feed.cron, () => parseFeed(feed)) + cron.schedule(feed.cron, run(parseFeed(feed))) + } } diff --git a/rss-to-tana/item.js b/rss-to-tana/item.js index 63dc162..c8e5dd9 100644 --- a/rss-to-tana/item.js +++ b/rss-to-tana/item.js @@ -1,25 +1,26 @@ -function handleNewRSSItem(feedURL, item, node) { - console.log(feedURL, `new ${item.title} detected`); +const API_KEY = process.env.TANA_API_KEY +function postNodes(nodes) { + // Sending all given nodes at once as we think we won't have more than 100 + // nodes here + // @see https://github.com/tanainc/tana-input-api-samples + // + // We're also adding the #inbox super tag on all node const payload = { targetNodeId: 'INBOX', - nodes: [ - { - ...node, - supertags: [ - ...node.supertags, - { - /* inbox */ - id: 'hNwXd-0aYDVj' - } - ] - } - ] + nodes: nodes.map(node => ({ + ...node, + supertags: [ + ...node.supertags, + { + /* inbox */ + id: 'hNwXd-0aYDVj' + } + ] + })) }; - const API_KEY = process.env.TANA_API_KEY - - fetch('https://europe-west1-tagr-prod.cloudfunctions.net/addToNodeV2', { + return fetch('https://europe-west1-tagr-prod.cloudfunctions.net/addToNodeV2', { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -28,15 +29,39 @@ function handleNewRSSItem(feedURL, item, node) { body: JSON.stringify(payload) }) .then(response => { - if (response.ok) { - console.log(feedURL, `${item.title} saved`); - } else { - console.error(feedURL, `error saving ${item.title} item: ${response.status} ${response.statusText}`); + if (!response.ok) { + throw new Error(`Error saving nodes: ${response.status} ${response.statusText}`) } }) - .catch(error => { - console.error(feedURL, 'error making HTTP POST request', error); - }); } -module.exports = { handleNewRSSItem }; +const queue = [] + +// every 20s, we post the queue +setInterval( + () => { + if (queue.length) { + console.log(`Posting ${queue.length} items to Tana`) + + // extracting all items from the queue + const nodes = queue.splice(0, Infinity) + + postNodes(nodes) + .then(() => { + console.log(`${nodes.length} nodes saved`); + }) + // in case of failure, we put back items in the queue + .catch(error => { + console.error(error); + queue.push(...nodes) + }); + } + }, + 20 * 1000 +) + +function saveItem(node) { + queue.push(node) +} + +module.exports = { saveItem }; diff --git a/rss-to-tana/runner.js b/rss-to-tana/runner.js new file mode 100644 index 0000000..8b9d1fc --- /dev/null +++ b/rss-to-tana/runner.js @@ -0,0 +1,24 @@ +// starting 1 minute before deployment time to handle potential downtime +const processStartTimeMs = Date.now() - 1 * 60 * 1000; + +/** + * We can pass an optional START_TIME env var to control the RSS parsing start + * date. Default is process start time minus one minute. + * + * Iso date, eg. 2023-06-27T18:00:00.000Z + */ +const startTime = new Date(process.env.START_TIME || processStartTimeMs); + +console.log(`Using ${startTime.toISOString()} as start time`) + +function run(callback) { + let lastRunDate = startTime; + + return function () { + const now = new Date() + callback(lastRunDate) + lastRunDate = now + } +} + +module.exports = { run } diff --git a/rss-to-tana/tana.js b/rss-to-tana/tana.js index ee600eb..7d26a9e 100644 --- a/rss-to-tana/tana.js +++ b/rss-to-tana/tana.js @@ -1,6 +1,6 @@ function album(item) { return { - name: '', + name: item.title, supertags: [ { /* Album */ @@ -14,7 +14,7 @@ function album(item) { attributeId: 'ksBOEhsvfu', children: [ { - name: item.title + name: item.title, } ] }, @@ -24,7 +24,7 @@ function album(item) { attributeId: 'SalqarOgiv', children: [ { - name: item.url + name: item.link } ] } @@ -45,11 +45,11 @@ function website(item) { { /* URL */ type: 'field', - attributeId: 'SYS_A78', + attributeId: 'S4UUISQkxn2X', children: [ { dataType: 'url', - name: item.url + name: item.link } ] },