Permalink
Browse files

new scripts for search for duplicate blog posts

  • Loading branch information...
davidmerfield committed Jan 9, 2019
1 parent d948e6a commit 94b253680b1a0479b32657e9bc0aabdaa564002f
Showing with 105 additions and 0 deletions.
  1. +54 −0 scripts/entry/dupes.js
  2. +32 −0 scripts/entry/getAllEntries.js
  3. +19 −0 scripts/get/blog.js
@@ -0,0 +1,54 @@
var redis = require("redis").createClient();
var get = require("../get/entry");
var async = require("async");
var getAllEntries = require("./getAllEntries");
var colors = require("colors/safe");
var moment = require('moment');
var otherEntries, sameTitle;

function print(entries, label) {
if (!entries.length) return;

console.log("\n" + label + ":");
entries.forEach(function(entry, i) {
console.log(colors.dim('(' + i + ") ") + entry.path, colors.red('deleted:' + entry.deleted), colors.blue(entry.date));
});
}

get(process.argv[2], function(err, user, blog, entry) {
if (err) throw err;

console.log(
colors.dim("Searching for duplicates of"),
entry.path,
colors.dim("(" + process.argv[2] + ")")
);

getAllEntries(blog.id, function(err, entries) {
if (err) throw err;

entries = entries.map(function(entry){
entry.date = moment(entry.dateStamp).format(blog.dateFormat);
return entry
});

otherEntries = entries.filter(function(otherEntry) {
return otherEntry.guid !== entry.guid;
});

sameTitle = otherEntries.filter(function(otherEntry) {
return otherEntry.title === entry.title;
});

sameSlug = otherEntries.filter(function(otherEntry) {
return otherEntry.slug === entry.slug;
});

print(sameTitle, colors.dim("Same title '") + entry.title + colors.dim("'"));

print(sameSlug, colors.dim("Same slug '") + entry.slug + colors.dim("'"));

console.log("Search complete!");
process.exit();
});
});
@@ -0,0 +1,32 @@
var redis = require("redis").createClient();
var async = require("async");

module.exports = function(blogID, callback) {
var args = ["0", "MATCH", "blog:" + blogID + ":entry:*", "COUNT", 1000];
var entryKeys = [];

redis.scan(args, function then(err, res) {
if (err) throw err;

// the cursor for the next pass
args[0] = res[0];

// Append the keys we matched in the last pass
entryKeys = entryKeys.concat(res[1]);

// There are more keys to check, so keep going
if (res[0] !== "0") return redis.scan(args, then);

async.map(
entryKeys,
function(entry, next) {
redis.get(entry, function(err, entry) {
if (err) return next(err);

next(null, JSON.parse(entry));
});
},
callback
);
});
};
@@ -0,0 +1,19 @@
var User = require("../../app/models/user");
var Blog = require("../../app/models/blog");
var parseUrl = require("url").parse;

// Takes a URL and fetches the blog, user and entry

module.exports = function get(url, callback) {
url = parseUrl(url);

Blog.get({ domain: url.host }, function(err, blog) {
if (err || !blog) return callback(err || new Error("No blog"));

User.getById(blog.owner, function(err, user) {
if (err || !user) return callback(err || new Error("No user"));

callback(err, user, blog);
});
});
};

0 comments on commit 94b2536

Please sign in to comment.