Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c2ce7d8
commit 6eea118
Showing
7 changed files
with
189 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 20 additions & 3 deletions
23
app/dashboard/routes/importer/sources/wordpress/item/convert_to_markdown.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,23 @@ | ||
var Turndown = require("turndown"); | ||
var turndown = new Turndown(); | ||
var debug = require('debug')('blot:importer:wordpress:markdown'); | ||
|
||
module.exports = function(entry, callback) { | ||
|
||
entry.content = require("../../../helper").to_markdown(entry.html); | ||
|
||
debug(); | ||
debug(); | ||
debug('Input HTML:'); | ||
debug(); | ||
debug(entry.html); | ||
|
||
entry.content = turndown.turndown(entry.html); | ||
|
||
entry.content = entry.content.trim(); | ||
|
||
debug(); | ||
debug(); | ||
debug('Result:'); | ||
debug(); | ||
debug(entry.content); | ||
|
||
callback(null, entry); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
app/dashboard/routes/importer/sources/wordpress/item/remove_inline_images.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
var debug = require("debug")("blot:importer:wordpress:tidy_caption"); | ||
var cheerio = require("cheerio"); | ||
|
||
module.exports = function(html) { | ||
var $ = cheerio.load(html); | ||
|
||
debug($.html()); | ||
|
||
$("p") | ||
.filter(function() { | ||
return $(this).find("img").length; | ||
}) | ||
.each(function(i, p) { | ||
if (!$(this).text()) return; | ||
|
||
$(this) | ||
.find("a img") | ||
.each(function(i, aWithImg) { | ||
$('<p>' + $.html(aWithImg) + '</p>').insertBefore(p); | ||
$(aWithImg).remove(); | ||
}); | ||
|
||
}); | ||
|
||
debug($.html()); | ||
|
||
return $.html(); | ||
}; |
90 changes: 90 additions & 0 deletions
90
app/dashboard/routes/importer/sources/wordpress/item/tidy.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
var insert_video_embeds = require("../../../helper").insert_video_embeds; | ||
var debug = require("debug")("blot:importer:wordpress:tidy"); | ||
var remove_inline_images = require("./remove_inline_images.js"); | ||
|
||
module.exports = function(entry, callback) { | ||
var html = entry.html; | ||
|
||
html = fix_missing_p_tags(html); | ||
html = remove_caption(html); | ||
html = remove_embed(html); | ||
html = remove_inline_images(html); | ||
html = insert_video_embeds(html); | ||
|
||
entry.html = html; | ||
|
||
return callback(null, entry); | ||
}; | ||
|
||
function remove_caption(html) { | ||
while (html.indexOf("[caption") > -1) { | ||
var opening_index = html.indexOf("[caption"); | ||
var remainder = html.slice(opening_index); | ||
var closing_index = remainder.indexOf("]"); | ||
|
||
html = | ||
html.slice(0, opening_index) + | ||
html.slice(opening_index + closing_index + 1); | ||
html = html.split("[/caption]").join(""); | ||
} | ||
|
||
return html; | ||
} | ||
|
||
function remove_embed(html) { | ||
while (html.indexOf("[embed") > -1) { | ||
var opening_index = html.indexOf("[embed"); | ||
var remainder = html.slice(opening_index); | ||
var closing_index = remainder.indexOf("]"); | ||
|
||
html = | ||
html.slice(0, opening_index) + | ||
html.slice(opening_index + closing_index + 1); | ||
html = html.split("[/embed]").join(""); | ||
} | ||
|
||
return html; | ||
} | ||
|
||
function fix_missing_p_tags(html) { | ||
// HTML created by windows users contains /r instead of newlines | ||
// which breaks the following code | ||
html = html.split("\r").join("\n"); | ||
|
||
// Check for the closing tag instead of the | ||
// opening tag to avoid matching <p> and <p id="..."> etc... | ||
var has_p_tag = html.indexOf("</p>") > -1; | ||
var doesnt_have_double_line_break = html.indexOf("\n\n") === -1; | ||
|
||
if (has_p_tag || doesnt_have_double_line_break) { | ||
if (has_p_tag) | ||
debug( | ||
"Not interserting missing <p> tags into HTML because it already has p tags" | ||
); | ||
if (doesnt_have_double_line_break) | ||
debug( | ||
"Not interserting missing <p> tags into HTML because it does not have double line breaks" | ||
); | ||
|
||
debug(JSON.stringify(html)); | ||
|
||
return html; | ||
} | ||
|
||
// console.log('! Warning, replacing missing <p> tags.') | ||
// console.log('---- BEFORE'); | ||
// console.log(html); | ||
// console.log('----'); | ||
|
||
html = html.split("\n\n"); | ||
html = html.map(function(line) { | ||
return "<p>" + line + "</p>"; | ||
}); | ||
html = html.join("\n\n"); | ||
|
||
// console.log('---- AFTER'); | ||
// console.log(html); | ||
// console.log('----'); | ||
|
||
return html; | ||
} |
51 changes: 0 additions & 51 deletions
51
app/dashboard/routes/importer/sources/wordpress/item/tidy_HTML.js
This file was deleted.
Oops, something went wrong.