Skip to content
Permalink
Browse files

Added weebly importer

  • Loading branch information...
davidmerfield committed Mar 11, 2019
1 parent 9217934 commit 552467d582f5afd5c44fdc231d7d18b305a3e099
@@ -0,0 +1,69 @@
var entries = require("./entries.json");
var cheerio = require("cheerio");
var fs = require("fs-extra");
var async = require("async");
var Turndown = require("turndown");
var turndown = new Turndown();
var domain = process.argv[2];
var output = __dirname + "/data";
async.eachSeries(
entries,
function(entry, next) {
var $ = cheerio.load(entry.html);
var urls = [];
var outputFolder = output + "/" + entry.title;
$("[src]").each(function() {
var url = $(this).attr("src");
if (url[0] === "/") url = domain + url;
urls.push({
url: url,
node: this,
name: "_" + require("path").basename(require("url").parse(url).pathname)
});
});
async.eachSeries(
urls,
function(item, next) {
var path = outputFolder + "/" + item.name;
fs.ensureFileSync(path);
fs.removeSync(path);
download(item.url, path, function(err) {
if (err) {
console.log("Error downloading", item.url, err);
return next();
}
$(item.node).attr("src", item.name);
next();
});
},
function(err) {
if (err) return next(err);
entry.html = $.html();
entry.content = turndown.turndown(entry.html).trim();
entry.content =
"Date: " +
entry.date +
"\n\n# " +
entry.title +
"\n\n" +
entry.content;
fs.outputFileSync(outputFolder + "/post.txt", entry.content);
console.log("Wrote:", outputFolder + "/post.txt");
next();
}
);
},
function(err) {
if (err) throw err;
console.log("Done!");
process.exit();
}
);

function download(uri, filename, callback) {
console.log("Downloading", uri, "to", filename);
require("child_process").exec(
'curl -L -o "' + filename + '" "' + uri + '"',
callback
);
}
@@ -0,0 +1,67 @@
var cheerio = require("cheerio");
var fs = require("fs-extra");
var async = require("async");

var archives = process.argv[2];
var domain = "https://" + require("url").parse(archives).hostname;

function curl(url, cb) {
require("child_process").exec("curl -L " + url, cb);
}

var entries = [];

curl(archives, function(err, res) {
if (err) throw err;

var $ = cheerio.load(res);

$("#wsite-content ul li a").each(function() {
var source;

if ($(this).attr("href")[0] === "/") {
source = domain + $(this).attr("href");
} else {
source = $(this).attr("href");
}

entries.push({
title: $(this).text(),
source: source
});
});

console.log("Downloading " + entries.length + " entries");

async.mapSeries(
entries,
function(entry, next) {
console.log(".", entry.title, entry.source);
curl(entry.source, function(err, res) {
if (err) return next(err);

var $ = cheerio.load(res);

entry.html = $(".blog-post .blog-content")
.html()
.trim();

entry.date = $(".blog-date .date-text")
.text()
.trim();

return next(null, entry);
});
},
function(err, entries) {
if (err) throw err;

fs.outputJson("entries.json", entries, function(err) {
if (err) throw err;

console.log("Done!");
process.exit();
});
}
);
});
Oops, something went wrong.

0 comments on commit 552467d

Please sign in to comment.
You can’t perform that action at this time.