Permalink
Browse files

Initial stab at creating a MediaWiki to X converter, focusing on Mark…

…down first.
  • Loading branch information...
0 parents commit a962bb326aaedea5380cec71afcbef612c5173df @headius committed Nov 23, 2010
Showing with 310 additions and 0 deletions.
  1. +14 −0 README.txt
  2. +15 −0 bin/mediakiller
  3. +19 −0 examples/grab_bag.rb
  4. +183 −0 lib/mediakiller.rb
  5. +79 −0 spec/conversion_spec.rb
@@ -0,0 +1,14 @@
+This is a very simple converter from MediaWiki to other formats (only
+markdown is supported currently).
+
+Dependencies: mediacloth (gem)
+
+Usage:
+
+As an API:
+
+MediaKiller.new(:markdown).convert(some_mediawiki_text)
+
+At the command line:
+
+mediakiller markdown some_mediawiki_file
@@ -0,0 +1,15 @@
+#!/usr/bin/jruby
+
+begin
+ require 'mediakiller'
+rescue LoadError
+ $: << File.join(File.dirname(File.dirname(__FILE__)), "lib")
+ require 'mediakiller'
+end
+
+if ARGV.size != 2
+ STDERR.puts "usage: mediakiller <format> <file>"
+ exit 1
+end
+
+puts MediaKiller.new(ARGV[0].intern).convert(File.read(ARGV[1]))
@@ -0,0 +1,19 @@
+require 'mediakiller'
+
+wikitext = <<WIKI
+==header1==
+===header2===
+* list
+* list2
+
+# foo
+# foo2
+
+This is a link: [[asdf]]
+This is one inline [[qwer|Qwerty]] in a sentence.
+
+This text is '''bold'''.
+This text is ''italic''.
+WIKI
+
+puts MediaKiller.new(:markdown).convert(wikitext)
@@ -0,0 +1,183 @@
+begin
+ require 'mediacloth'
+rescue LoadError
+ require 'rubygems'
+ require 'mediacloth'
+end
+
+class MediaKiller
+ def initialize(target)
+ case target
+ when :markdown
+ # ok
+ else
+ raise "unsupported target format: #{target}"
+ end
+
+ @generator_class = TARGETS[target]
+
+ # default link mapper just passes through
+ @link_mapper = lambda {|text, link| return text, link}
+ end
+
+ attr_accessor :link_mapper
+
+ def convert(content)
+ @parser = MediaWikiParser.new
+ @parser.lexer = MediaWikiLexer.new
+ ast = @parser.parse(content)
+
+ generator = @generator_class.new
+ generator.link_mapper = @link_mapper
+ # this chomping is gross, but the mediacloth AST doesn't
+ # reflect structure well, so we toss \n around a lot
+ generator.parse(ast).chomp.chomp
+ end
+
+ class MediaWikiMarkdownGenerator
+ attr_accessor :link_mapper
+
+ def parse(ast)
+ case ast
+ when InternalLinkAST
+ parse_internal_link(ast)
+ when InternalLinkItemAST
+ parse_internal_link_item(ast)
+ when LinkAST
+ parse_link(ast)
+ when ListAST
+ parse_list(ast)
+ when ListItemAST
+ parse_list_item(ast)
+ when ParagraphAST
+ parse_paragraph(ast)
+ when PreformattedAST
+ parse_preformatted(ast)
+ when ResourceLinkAST
+ parse_resource_link(ast)
+ when SectionAST
+ parse_section(ast)
+ when TextAST
+ parse_text(ast)
+ when WikiAST
+ parse_wiki(ast)
+
+ # this comes last, because it's a superclass of several others
+ when FormattedAST
+ parse_formatted(ast)
+ else
+ raise "unknown AST element: #{ast}"
+ end
+ end
+
+ def parse_formatted(ast)
+ case ast.formatting
+ when :Bold
+ "**#{parse_wiki(ast)}**"
+ when :Italic
+ "*#{parse_wiki(ast)}*"
+ else
+ raise "unsupported formatting: #{ast.formatting} in #{ast}"
+ end
+ end
+
+ def parse_internal_link(ast)
+ # is there ever more than one child?
+ link = ast.locator
+ text = ast.children.empty? ? link : parse(ast.children.first)
+ mapped_text, mapped_link =
+ link_mapper.call(text, link)
+ "[#{mapped_text}](#{mapped_link})"
+ end
+
+ def parse_internal_link_item(ast)
+ # do these do anything other than aggregate a child?
+ parse(ast.children.first)
+ end
+
+ def parse_link(ast)
+ # is there ever more than one child?
+ link = ast.url
+ text = ast.children.empty? ? link : parse(ast.children.first)
+ mapped_text, mapped_link =
+ link_mapper.call(text, link)
+ "[#{mapped_text}](#{mapped_link})"
+ end
+
+ def parse_list(ast)
+ case ast.list_type
+ when :Bulleted
+ ast.children.map do |child|
+ "* #{parse(child)}"
+ end.join + "\n"
+ when :Numbered
+ index = 0
+ ast.children.map do |child|
+ index+=1
+ "#{index}. #{parse(child)}"
+ end.join + "\n"
+ else
+ raise "unsupported list format: #{ast.list_type} in #{ast}"
+ end
+ end
+
+ def parse_list_item(ast)
+ # do list items do anything but aggregate a child?
+ parse(ast.children.first)
+ end
+
+ def parse_paragraph(ast)
+ parse_wiki(ast) + "\n\n"
+ end
+
+ def parse_preformatted(ast)
+ '> ' + ast.contents
+ end
+
+ def parse_resource_link(ast)
+ case ast.prefix
+ when "Image"
+ # size not supported
+ link = ast.locator
+ if ast.children.empty?
+ text = link
+ else
+ case ast.children.size
+ when 1
+ text = parse(ast.children.first)
+ when 2
+ warn "image size is not supported"
+ text = parse(ast.children[1])
+ else
+ raise "unknown size of image resource link children: #{ast.inspect}"
+ end
+ end
+ mapped_text, mapped_link = link_mapper.call(text, link)
+ "![#{mapped_text}](#{mapped_link})"
+ else
+ raise "unsupported resource link: #{ast.inspect}"
+ end
+ end
+
+ def parse_section(ast)
+ content = ast.children.map {|child| parse(child)}.join
+ length = content.length
+ # prepend appropriate number of hash signs plus a space
+ "#{'#' * (ast.level - 1)} #{content}\n"
+ end
+
+ def parse_text(ast)
+ ast.contents
+ end
+
+ def parse_wiki(ast)
+ ast.children.map do |child|
+ parse(child)
+ end.join
+ end
+ end
+
+ TARGETS = {
+ :markdown => MediaWikiMarkdownGenerator
+ }
+end
@@ -0,0 +1,79 @@
+$: << File.join(File.dirname(File.dirname(__FILE__)), 'lib')
+require 'mediakiller'
+
+describe "MediaWiki markup" do
+ before :each do
+ @killer = MediaKiller.new(:markdown)
+ end
+
+ describe "headers" do
+ it "becomes '# ' headers of appropriate depth" do
+ @killer.convert("==header==").should == "# header"
+ @killer.convert("===header===").should == "## header"
+ @killer.convert("====header====").should == "### header"
+ @killer.convert("=====header=====").should == "#### header"
+ @killer.convert("======header======").should == "##### header"
+ @killer.convert("=======header=======").should == "###### header"
+ end
+ end
+
+ describe "preformatted text" do
+ it "becomes '> ' prefixed text" do
+ @killer.convert(" line1\n line2").should == "> line1\n> line2"
+ end
+ end
+
+ describe "italic text" do
+ it "becomes emphasized text" do
+ @killer.convert("''text''").should == '*text*'
+ end
+ end
+
+ describe "bold text" do
+ it "becomes strong text" do
+ @killer.convert("'''text'''").should == '**text**'
+ end
+ end
+
+ describe "unordered bulleted lists" do
+ it "remain the same" do
+ @killer.convert("* one\n* two").should == "* one\n* two"
+ end
+ end
+
+ describe "ordered bulleted lists" do
+ it "become numbered lists" do
+ @killer.convert("# one\n# two").should == "1. one\n2. two"
+ end
+ end
+
+ describe "internal links" do
+ it "become []() links based on provided mapper" do
+ @killer.convert("[[Foo|Bar]]").should == "[Bar](Foo)"
+ end
+ end
+
+ describe "external links" do
+ it "becomes []() links based on provided mapper" do
+ @killer.convert("http://blog.headius.com").should ==
+ "[http://blog.headius.com](http://blog.headius.com)"
+ @killer.convert("[http://blog.headius.com blog]").should ==
+ "[blog](http://blog.headius.com)"
+ end
+ end
+
+ describe "inline images" do
+ it "becomes ![]() images based on provided mapper" do
+ @killer.convert("[[Image:something.png|50px|alt text]]").should ==
+ "![alt text](something.png)"
+ end
+ end
+
+ describe "code sections" do
+ it "becomes backticked sections" do
+ pending "mediacloth does not treat <code> separately" do
+ @killer.convert('<code>blah</code>').should == '`blah`'
+ end
+ end
+ end
+end

0 comments on commit a962bb3

Please sign in to comment.