Skip to content

Commit

Permalink
working rss-to-tana
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaumewuip committed Jul 21, 2023
1 parent f221303 commit 94ef563
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 49 deletions.
3 changes: 3 additions & 0 deletions rss-to-tana/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# flyctl launch added from .gitignore
**/node_modules
fly.toml
16 changes: 16 additions & 0 deletions rss-to-tana/fly.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# fly.toml app configuration file generated for rss-to-tana on 2023-07-21T12:04:26+02:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#

app = "rss-to-tana"
primary_region = "cdg"

[build]
builder = "heroku/buildpacks:20"

[env]
NODE_ENV = "production"

[processes]
rss = "npm start"
51 changes: 32 additions & 19 deletions rss-to-tana/index.js
Original file line number Diff line number Diff line change
@@ -1,49 +1,62 @@
const RSSParser = require('rss-parser');
const cron = require('node-cron');

const { handleNewRSSItem } = require('./item');
const { saveItem } = require('./item');
const { run } = require('./runner')
const Tana = require('./tana');

const parser = new RSSParser();

const rssFeeds = [
{
url: 'https://lesoreillescurieuses.com/feed/',
// cron: '0 0 23,6 * * *', // 23:00 and 06:00 every day
cron: '0 * * * * *', // 23:00 and 06:00 every day
cron: '0 0 23,6 * * *', // 23:00 and 06:00 every day
toTana: Tana.album,
},
{
url: 'https://cmd.wuips.com/rss/feed.xml',
// cron: '0 0 * * * *', // every hour every day
cron: '0 * * * * *', // every hour every day
cron: '0 0 * * * *', // every hour every day
toTana: Tana.website,
},

];

const startTime = new Date('2022-09-27 18:00:00.000.');
function parseFeed(feed) {
return async function (lastRunDate) {
try {
console.log(feed.url, `parsing for items published after ${lastRunDate.toISOString()}`)
const parsedFeed = await parser.parseURL(feed.url);

async function parseFeed(feed) {
try {
console.log(feed.url, 'parsing')
const parsedFeed = await parser.parseURL(feed.url);
for (const item of parsedFeed.items) {
const pubDate = new Date(item.pubDate);
if (pubDate > lastRunDate) {
console.log(feed.url, `new ${item.title} detected`);

for (const item of parsedFeed.items) {
const pubDate = new Date(item.pubDate);
if (pubDate > startTime) {
const tanaNode = feed.toTana(item)
handleNewRSSItem(feed.url, item, tanaNode);
const tanaNode = feed.toTana(item)
saveItem(tanaNode);
}
}
} catch (error) {
console.error(feed.url, `parsing error`, error);
}
} catch (error) {
console.error(feed.url, `parsing error`, error);
}
}


for (const feed of rssFeeds) {
console.log('Scheduling', feed.url, 'on', feed.cron)
/**
* We can use FORCE=true env var to run the feeds parsing directly, without
* cron schedule
*/
if (process.env.FORCE === 'true') {
run(parseFeed(feed))()
} else {
console.log('Scheduling', feed.url, 'on', feed.cron)

if (!cron.validate(feed.cron)) {
throw new Error(`${feed.cron} not a valid cron expression`)
}

cron.schedule(feed.cron, () => parseFeed(feed))
cron.schedule(feed.cron, run(parseFeed(feed)))
}
}
75 changes: 50 additions & 25 deletions rss-to-tana/item.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
function handleNewRSSItem(feedURL, item, node) {
console.log(feedURL, `new ${item.title} detected`);
const API_KEY = process.env.TANA_API_KEY

function postNodes(nodes) {
// Sending all given nodes at once as we think we won't have more than 100
// nodes here
// @see https://github.com/tanainc/tana-input-api-samples
//
// We're also adding the #inbox super tag on all node
const payload = {
targetNodeId: 'INBOX',
nodes: [
{
...node,
supertags: [
...node.supertags,
{
/* inbox */
id: 'hNwXd-0aYDVj'
}
]
}
]
nodes: nodes.map(node => ({
...node,
supertags: [
...node.supertags,
{
/* inbox */
id: 'hNwXd-0aYDVj'
}
]
}))
};

const API_KEY = process.env.TANA_API_KEY

fetch('https://europe-west1-tagr-prod.cloudfunctions.net/addToNodeV2', {
return fetch('https://europe-west1-tagr-prod.cloudfunctions.net/addToNodeV2', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Expand All @@ -28,15 +29,39 @@ function handleNewRSSItem(feedURL, item, node) {
body: JSON.stringify(payload)
})
.then(response => {
if (response.ok) {
console.log(feedURL, `${item.title} saved`);
} else {
console.error(feedURL, `error saving ${item.title} item: ${response.status} ${response.statusText}`);
if (!response.ok) {
throw new Error(`Error saving nodes: ${response.status} ${response.statusText}`)
}
})
.catch(error => {
console.error(feedURL, 'error making HTTP POST request', error);
});
}

module.exports = { handleNewRSSItem };
const queue = []

// every 20s, we post the queue
setInterval(
() => {
if (queue.length) {
console.log(`Posting ${queue.length} items to Tana`)

// extracting all items from the queue
const nodes = queue.splice(0, Infinity)

postNodes(nodes)
.then(() => {
console.log(`${nodes.length} nodes saved`);
})
// in case of failure, we put back items in the queue
.catch(error => {
console.error(error);
queue.push(...nodes)
});
}
},
20 * 1000
)

function saveItem(node) {
queue.push(node)
}

module.exports = { saveItem };
24 changes: 24 additions & 0 deletions rss-to-tana/runner.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// starting 1 minute before deployment time to handle potential downtime
const processStartTimeMs = Date.now() - 1 * 60 * 1000;

/**
* We can pass an optional START_TIME env var to control the RSS parsing start
* date. Default is process start time minus one minute.
*
* Iso date, eg. 2023-06-27T18:00:00.000Z
*/
const startTime = new Date(process.env.START_TIME || processStartTimeMs);

console.log(`Using ${startTime.toISOString()} as start time`)

function run(callback) {
let lastRunDate = startTime;

return function () {
const now = new Date()
callback(lastRunDate)
lastRunDate = now
}
}

module.exports = { run }
10 changes: 5 additions & 5 deletions rss-to-tana/tana.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
function album(item) {
return {
name: '',
name: item.title,
supertags: [
{
/* Album */
Expand All @@ -14,7 +14,7 @@ function album(item) {
attributeId: 'ksBOEhsvfu',
children: [
{
name: item.title
name: item.title,
}
]
},
Expand All @@ -24,7 +24,7 @@ function album(item) {
attributeId: 'SalqarOgiv',
children: [
{
name: item.url
name: item.link
}
]
}
Expand All @@ -45,11 +45,11 @@ function website(item) {
{
/* URL */
type: 'field',
attributeId: 'SYS_A78',
attributeId: 'S4UUISQkxn2X',
children: [
{
dataType: 'url',
name: item.url
name: item.link
}
]
},
Expand Down

0 comments on commit 94ef563

Please sign in to comment.