/
parse-wxr.php
69 lines (60 loc) · 1.68 KB
/
parse-wxr.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php
$root = dirname( __DIR__ );
$reader = simplexml_load_file( __DIR__ . '/import.xml' );
// HTML is valid Markdown, but we should replace for comfort's sake.
$markdown_map = [
'<h2>' => '## ',
'</h2>' => '',
'<h3>' => '### ',
'</h3>' => '',
'[php]' => '```php',
'<?php' => '<' . '?php',
'[/php]' => '```',
'<code>' => '`',
'</code>' => '`',
'<em>' => '*',
'</em>' => '*',
'<strong>' => '**',
'</strong>' => '**',
'“' => '"',
'”' => '"',
'’' => "'",
'<' => '<',
'>' => '>',
'"' => '"',
'&' => '&',
];
foreach ( $reader->channel->item as $item ) {
$title = (string) $item->title;
$url = (string) $item->link;
$content = (string) $item->children( 'http://purl.org/rss/1.0/modules/content/' )->encoded;
$wxr_item = $item->children( 'http://wordpress.org/export/1.2/' );
$status = (string) $wxr_item->status;
if ( $status !== 'publish' ) {
printf( "Skipping draft %s\n", $title );
continue;
}
$slug = str_replace( 'https://developer.wordpress.org/rest-api/', '', $url );
if ( empty( $slug ) ) {
$slug = 'index';
}
$slug = rtrim( $slug, '/' ) . '.md';
$path = $root . '/' . $slug;
if ( ! file_exists( dirname( $path ) ) ) {
mkdir( dirname( $path ), 0755, true );
}
printf( "Creating %s: %s\n", $title, $path );
$markdowned = str_replace(
array_keys( $markdown_map ),
array_values( $markdown_map ),
$content
);
$markdowned = preg_replace(
'#<a href="([^"]+)">([^<]+)</a>#i',
'[\2](\1)',
$markdowned
);
$data = sprintf( "# %s\n\n%s\n", $title, $markdowned );
// echo str_replace( "\n", "\n\t", $data ) . "\n\n";
file_put_contents( $path, $data );
}