Skip to content

Commit

Permalink
Issue Seneca-CDOT#22, Inactive Blog Filter
Browse files Browse the repository at this point in the history
  • Loading branch information
jerryshueh committed Nov 14, 2019
1 parent 70c1290 commit 3c522cc
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 6 deletions.
12 changes: 8 additions & 4 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ PORT=8080
# REDIS_URL specifies Redis server info
REDIS_URL=redis://127.0.0.1:6379

# NODEMAILER_USERNAME is sender's username credential
NODEMAILER_USERNAME=
# NODEMAILER_USERNAME is sender's username credential
NODEMAILER_USERNAME=

# NODEMAILER_PASSWORD is sender's password credential
# NODEMAILER_PASSWORD is sender's password credential
NODEMAILER_PASSWORD=

# NODEMAILER_SERVER is sender's server that they are using (Example: smtp.example.com)
NODEMAILER_SERVER=
NODEMAILER_SERVER=

# BLOG_INACTIVE_TIME is the period (days) of inactivity
# before a blog will be considered redlisted
BLOG_INACTIVE_TIME=360
30 changes: 30 additions & 0 deletions feeds-redlist.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
{
"url": "http://ajhooper.blogspot.com/feeds/posts/default",
"lastUpdate": "2008-10-18T17:22:32.366Z"
},
{
"url": "http://KrazyDre.blogspot.com/feeds/posts/default?alt=rss",
"lastUpdate": null
},
{
"url": "http://nadavid.blogspot.com/feeds/posts/default",
"lastUpdate": "2008-09-07T22:12:00.544Z"
},
{
"url": "http://ljubomirgorscak.blogspot.com/feeds/posts/default",
"lastUpdate": null
},
{
"url": "http://dcucereavii.blogspot.com/feeds/posts/default?alt=rss",
"lastUpdate": "2009-11-17T00:03:00.000Z"
},
{
"url": "http://gkrilov.blogspot.com/feeds/posts/default",
"lastUpdate": "2009-09-19T04:17:33.156Z"
},
{
"url": "http://nashutzu.blogspot.com/feeds/posts/default",
"lastUpdate": null
}
]
4 changes: 2 additions & 2 deletions feeds.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# A simple list of feeds for initial testing, copied from
# A simple list of feeds for initial testing, copied from
# https://wiki.cdot.senecacollege.ca/wiki/Planet_CDOT_Feed_List
https://neilong31.wordpress.com/feed/
http://ajhooper.blogspot.com/feeds/posts/default
Expand All @@ -7,4 +7,4 @@ http://nashutzu.blogspot.com/feeds/posts/default
http://nadavid.blogspot.com/feeds/posts/default
http://gkrilov.blogspot.com/feeds/posts/default
http://KrazyDre.blogspot.com/feeds/posts/default?alt=rss
http://dcucereavii.blogspot.com/feeds/posts/default?alt=rss
http://dcucereavii.blogspot.com/feeds/posts/default?alt=rss
143 changes: 143 additions & 0 deletions src/inactive-blog-filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* Returns whether or not the blog should be marked as inactive
* Criteria for filtering (in milliseconds) is based on filters.json under blog.inactive
*/
require('./config.js');
const fs = require('fs');
const feedParser = require('./feed-parser');

/**
* Condition for passing redlist some() check
* @param {string} feedUrl - url of the feed to check against redlist
* @param {Object} redItem - object containing a redlist entry
* @returns {boolean} - bool value
*/
function isRedlisted(feedUrl, redItem) {
return (feedUrl === redItem.url);
}

/**
* Simple function that takes the difference between current date and post date
* If difference is greater than threshold, the blog is considered inactive
* @param {Date} postDate - javascript date object of the post being checked
* @returns {number} - date difference in milliseconds
*/
function dateDiff(postDate) {
const currentDate = new Date();
return currentDate - postDate;
}

/**
* Callback for redlist check
* @callback checkCallback
* @param {boolean} result - true/false whether or not a feed url is redlisted
*/

/**
* Checks if feed url is redlisted
* @param {string} feedUrl - url of the feed to check against redlist
* @param {checkCallback} callback - a callback that runs after the check
*/
function check(feedUrl, callback) {
// Read redlist file
fs.readFile('feeds-redlist.json', 'utf-8', (err, redListRaw) => {
if (err) {
// Error reading file
callback(err, false);
return;
}

if (redListRaw.length === 0) {
// File is empty
callback(undefined, false);
return;
}

// Concat to array no matter what
const redList = [].concat(JSON.parse(redListRaw));

callback(null, redList.some(isRedlisted.bind(null, feedUrl)));
});
}

/*
* Performs a separate sweep of all feeds to see which ones are inactive,
* then updates last post date in feeds-redlist.json
*
* Integration with system can be improved once feed-worker is augmented to
* pass feed data more intuitively
*
* Due to amount of operations, this can be run periodically instead of with every feed update
*/
function update() {
// Read the feeds list file
fs.readFile('feeds.txt', 'utf8', (err, lines) => {
if (err) {
console.error('unable to read initial list of feeds, cannot update', err.message);
return;
}

// Divide the file into separate lines
const feedUrlList = lines
.split(/\r?\n/)
// Basic filtering to remove any ines that don't look like a feed URL
.filter((line) => line.startsWith('http'))
// Convert this into an Object of the form expected by our queue
.map((url) => ({ url }));

const redlistUpdate = [];
let linesRead = 0;

feedUrlList.forEach(async (feedItem) => {
const feed = await feedParser(feedItem);
const feedUrl = feedItem.url;

let recentPostDate = new Date();

// In case of invalid/ dead feeds
if (typeof feed[0] !== 'undefined') {
recentPostDate = new Date(feed[0].date);

// Check if the blog is inactive
// We convert the dateDiff(result) from ms in days
const timeDiff = Math.ceil(dateDiff(recentPostDate) / (1000 * 3600 * 24));

if (timeDiff > process.env.BLOG_INACTIVE_TIME) {
console.log(`Blog at: ${feedUrl} is INACTIVE!`);

redlistUpdate.push({
url: feedUrl,
lastUpdate: feed[0].date,
});
}
} else {
console.log(`Blog at: ${feedUrl} HAS NOTHING TO SHARE!`);

redlistUpdate.push({
url: feedUrl,
lastUpdate: null,
});
}

// Use a counter to ensure all feeds are processed before writing redlist
linesRead += 1;

if (linesRead === feedUrlList.length) {
// Write the new feeds-redlist.json
const rlData = JSON.stringify(redlistUpdate, null, 2);

fs.writeFile('feeds-redlist.json', rlData, (werr) => {
if (werr) {
console.error('unable to write to feeds-redlist.json, cannot update', err.message);
return;
}

console.log('wrote to feeds-redlist.json');
});
}
});
});
}

exports.check = check;
exports.update = update;

0 comments on commit 3c522cc

Please sign in to comment.