Skip to content

Commit

Permalink
Merge a59dc44 into 4751d81
Browse files Browse the repository at this point in the history
  • Loading branch information
curbengh committed Jun 24, 2020
2 parents 4751d81 + a59dc44 commit c6d51ab
Show file tree
Hide file tree
Showing 7 changed files with 1,122 additions and 135 deletions.
20 changes: 16 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Migrate your blog from WordPress to [Hexo].

## Install

In your blog folder, add this npm dependencie to your project :
In your blog folder, add this npm dependencie to your project:

``` bash
$ npm install hexo-migrator-wordpress --save
```
Expand All @@ -20,7 +20,19 @@ Export your WordPress in "Tools" → "Export" → "WordPress" in your dashboard.
Execute the following command after installed. `source` is the file path or URL of WordPress export file.

``` bash
$ hexo migrate wordpress <source>
$ hexo migrate wordpress <source> [--options]
```

[Hexo]: http://zespia.tw/hexo
- **alias**: Populates the `alias` setting in the front-matter, for use with the [hexo-generator-alias](http://github.com/hexojs/hexo-generator-alias) module. This is useful for generating redirects.
- **limit**: Maximum number of posts to import from the feed. All posts are imported by default.
* Example:
``` bash
$ hexo migrate wordpress /path/export.xml --limit 3
```
* This doesn't apply to pages, all pages will be imported.
- **skipduplicate**: Skip posts with similar title as existing ones.
* If the input contains a post titled 'Foo Bar' and there is an existing post named 'Foo-Bar.md', then that post will not be migrated.
* The comparison is case-insensitive; a post titled 'FOO BAR' will be skipped if 'foo-bar.md' exists.
* Without this option (default), this plugin will continue to migrate that post and create a post named 'Foo-Bar-1.md'

[Hexo]: http://hexo.io/
122 changes: 2 additions & 120 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,122 +1,4 @@
'use strict';

/* global hexo */
'use strict';

const xml2js = require('xml2js');
const async = require('async');
const TurndownService = require('turndown');
const request = require('request');
const file = require('fs');

const turndownService = new TurndownService();

const captialize = function(str) {
return str[0].toUpperCase() + str.substring(1);
};

function replaceTwoBrace(str) {
str = str.replace(/{{/g, '{ {');
return str;
}

hexo.extend.migrator.register('wordpress', (args, callback) => {
const source = args._.shift();

if (!source) {
const help = [
'Usage: hexo migrate wordpress <source>',
'',
'For more help, you can check the docs: http://hexo.io/docs/migration.html'
];

console.log(help.join('\n'));
return callback();
}

const log = hexo.log;
const post = hexo.post;

log.i('Analyzing %s...', source);

async.waterfall([
function(next) {
// URL regular expression from: http://blog.mattheworiordan.com/post/13174566389/url-regular-expression-for-links-with-or-without-the
if (source.match(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/\\w]*))?)/)) {
request(source, (err, res, body) => {
if (err) throw err;
if (res.statusCode === 200) next(null, body);
});
} else {
file.readFile(source, next);
}
},
function(content, next) {
xml2js.parseString(content, next);
},
function(xml, next) {
let count = 0;

async.each(xml.rss.channel[0].item, (item, next) => {
if (!item['wp:post_type']) {
return next();
}

const title = item.title[0].replace(/"/g, '\\"');
const id = item['wp:post_id'][0];
const date = item['wp:post_date'][0];
const slug = item['wp:post_name'][0];
let content = item['content:encoded'][0];
const comment = item['wp:comment_status'][0];
const status = item['wp:status'][0];
const type = item['wp:post_type'][0];
const categories = [];
const tags = [];

if (!title && !slug) return next();
if (type !== 'post' && type !== 'page') return next();
if (typeof content !== 'string') content = '';
content = replaceTwoBrace(content);
content = turndownService.turndown(content).replace(/\r\n/g, '\n');
count++;

if (item.category) {
item.category.forEach((category, next) => {
const name = category._;

switch (category.$.domain) {
case 'category':
categories.push(name);
break;

case 'post_tag':
tags.push(name);
break;
}
});
}

const data = {
title: title || slug,
url: +id + '.html',
id: +id,
date: date,
content: content,
layout: status === 'draft' ? 'draft' : 'post'
};

if (type === 'page') data.layout = 'page';
if (slug) data.slug = slug;
if (comment === 'closed') data.comments = false;
if (categories.length && type === 'post') data.categories = categories;
if (tags.length && type === 'post') data.tags = tags;

log.i('%s found: %s', captialize(type), title);
post.create(data, next);
}, err => {
if (err) return next(err);

log.i('%d posts migrated.', count);
});
}
], callback);
});
hexo.extend.migrator.register('wordpress', require('./lib/migrator'));
44 changes: 44 additions & 0 deletions lib/feed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
'use strict';

/* !
* Ported from feed-furious 1.0.0 to support async-ed camaro v4+
* Licensed MIT (c) 2017 Tuan Anh Tran <https://tuananh.org/>
* https://github.com/tuananh/feed-furious
*/

const { transform } = require('camaro');

const template = {
wp: {
items: ['//item', {
title: 'title',
link: 'link',
date: 'wp:post_date|pubDate',
description: 'excerpt:encoded|description',
id: 'wp:post_id',
content: 'content:encoded',
comment: 'wp:comment_status',
slug: 'wp:post_name',
status: 'wp:status',
type: 'wp:post_type',
tags: ['category', '.']
}]
}
};

const detectFeedType = async xml => {
const sample = await transform(xml, {
wp: 'rss/channel/title'
});

if (sample.wp) return 'wp';
throw new Error('invalid format');
};

const parseFeed = async xml => {
const type = await detectFeedType(xml);
const output = await transform(xml, template[type]);
return output;
};

module.exports = parseFeed;
144 changes: 144 additions & 0 deletions lib/migrator.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
'use strict';

const TurndownService = require('turndown');
const got = require('got');
const { parse: parseUrl } = require('url');
const { exists, listDir, readFile } = require('hexo-fs');
const parseFeed = require('./feed');
const { slugize } = require('hexo-util');
const { join, parse } = require('path');
const { unescape } = require('querystring');

module.exports = async function(args) {
const source = args._.shift();
const { alias } = args;
let { limit } = args;
const skipduplicate = typeof args.skipduplicate !== 'undefined';
const tomd = new TurndownService();
const { config, log } = this;
const Post = this.post;
let untitledPostCounter = 0;
let errNum = 0;
let skipNum = 0;
let input, feed;
const rExcerpt = /<a id="more"><\/a>/i;
const postExcerpt = '\n<!-- more -->\n';
const posts = [];
let currentPosts = [];

try {
if (!source) {
const help = [
'Usage: hexo migrate wordpress <source> [--options]',
'',
'For more help, you can check the docs: https://github.com/hexojs/hexo-migrator-wordpress/blob/master/README.md'
];

throw help.join('\n');
}

if (/^http(s)?:\/\//i.test(source)) {
input = await got(source, { resolveBodyOnly: true, retry: 0 });
} else {
input = await readFile(source);
}

log.i('Analyzing %s...', source);

feed = await parseFeed(input);
} catch (err) {
throw new Error(err);
}

if (feed) {
if (typeof limit !== 'number' || limit > feed.items.length || limit <= 0) limit = feed.items.length;
let postLimit = 0;

for (const item of feed.items) {
if (postLimit >= limit) continue;

const { link, date, id, comment, slug, status, type, tags } = item;
let { title, content, description } = item;

const layout = status === 'draft' ? 'draft' : 'post';
content = tomd.turndown(content).replace(/\r\n/g, '\n');

if (type !== 'page') {
// Apply 'limit' option to post only
postLimit++;

if (rExcerpt.test(content)) {
content.replace(rExcerpt, postExcerpt);
} else if (description) {
description = tomd.turndown(description).replace(/\r\n/g, '\n');
content = description + postExcerpt + content;
}
}

if (!title) {
untitledPostCounter += 1;
const untitledPostTitle = 'Untitled Post - ' + untitledPostCounter;
title = untitledPostTitle;
log.w('Post found but without any titles. Using %s', untitledPostTitle);
} else {
log.i('Post found: %s', title);
}

const data = {
title,
id,
date,
content,
layout,
tags
};

if (type === 'page') data.layout = 'page';
if (slug) data.slug = slug;
if (slug && slug.includes('%')) data.slug = unescape(slug);
if (comment === 'closed') data.comments = false;
if (tags.length && type === 'post') data.tags = tags;

if (alias && link) {
data.alias = parseUrl(link).pathname;
}

posts.push(data);
}
}

if (skipduplicate) {
const postFolder = join(config.source_dir, '_posts');
const folderExist = await exists(postFolder);
const files = folderExist ? await listDir(join(config.source_dir, '_posts')) : [];
currentPosts = files.map(file => slugize(parse(file).name, { transform: 1 }));
}

if (posts.length >= 1) {
for (const post of posts) {
if (currentPosts.length && skipduplicate) {
if (currentPosts.includes(slugize(post.title, { transform: 1 }))) {
skipNum++;
continue;
}
}

try {
await Post.create(post);
} catch (err) {
log.error(err);
errNum++;
}
}

const postsNum = posts.length - errNum - skipNum;

if (untitledPostCounter) {
log.w('%d posts did not have titles and were prefixed with "Untitled Post".', untitledPostCounter);
}
if (postsNum) log.i('%d posts migrated.', postsNum);
if (errNum) log.error('%d posts failed to migrate.', posts.length);
if (skipNum) log.i('%d posts skipped.', skipNum);
}
};

19 changes: 13 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
"version": "1.0.0",
"description": "WordPress migrator plugin for Hexo",
"main": "index.js",
"directories": {
"lib": "./lib"
},
"files": [
"index.js"
"index.js",
"lib/"
],
"scripts": {
"test": "mocha test/index.js",
Expand All @@ -20,17 +24,20 @@
"author": "Tommy Chen <tommy351@gmail.com> (http://zespia.tw)",
"license": "MIT",
"dependencies": {
"async": "^3.1.0",
"request": "^2.36.0",
"turndown": "^6.0.0",
"xml2js": "^0.4.19"
"camaro": "^6.0.2",
"hexo-fs": "^3.1.0",
"hexo-util": "^2.1.0",
"got": "^11.3.0",
"turndown": "^6.0.0"
},
"devDependencies": {
"chai": "^4.2.0",
"eslint": "^7.0.0",
"eslint-config-hexo": "^4.0.0",
"hexo": "^4.2.0",
"mocha": "^8.0.1",
"nyc": "^15.0.0"
"nyc": "^15.0.0",
"sinon": "^9.0.2"
},
"engines": {
"node": ">= 12.13.0"
Expand Down
Loading

0 comments on commit c6d51ab

Please sign in to comment.