Permalink
Browse files

vendor syntax

  • Loading branch information...
1 parent 128f72d commit c8ee394cc53ad6324c518fea7b920d590c8251a7 @adamwiggins committed Nov 13, 2008
View
@@ -1,5 +1,5 @@
require File.dirname(__FILE__) + '/../vendor/maruku/maruku'
-require 'syntax/convertors/html'
+require File.dirname(__FILE__) + '/../vendor/syntax/syntax/convertors/html'
class Post < Sequel::Model
set_primary_key [ :id ]
@@ -0,0 +1,38 @@
+require 'syntax/common'
+
+module Syntax
+
+ # A default tokenizer for handling syntaxes that are not explicitly handled
+ # elsewhere. It simply yields the given text as a single token.
+ class Default
+
+ # Yield the given text as a single token.
+ def tokenize( text )
+ yield Token.new( text, :normal )
+ end
+
+ end
+
+ # A hash for registering syntax implementations.
+ SYNTAX = Hash.new( Default )
+
+ # Load the implementation of the requested syntax. If the syntax cannot be
+ # found, or if it cannot be loaded for whatever reason, the Default syntax
+ # handler will be returned.
+ def load( syntax )
+ begin
+ require "syntax/lang/#{syntax}"
+ rescue LoadError
+ end
+ SYNTAX[ syntax ].new
+ end
+ module_function :load
+
+ # Return an array of the names of supported syntaxes.
+ def all
+ lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
+ Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
+ end
+ module_function :all
+
+end
@@ -0,0 +1,163 @@
+require 'strscan'
+
+module Syntax
+
+ # A single token extracted by a tokenizer. It is simply the lexeme
+ # itself, decorated with a 'group' attribute to identify the type of the
+ # lexeme.
+ class Token < String
+
+ # the type of the lexeme that was extracted.
+ attr_reader :group
+
+ # the instruction associated with this token (:none, :region_open, or
+ # :region_close)
+ attr_reader :instruction
+
+ # Create a new Token representing the given text, and belonging to the
+ # given group.
+ def initialize( text, group, instruction = :none )
+ super text
+ @group = group
+ @instruction = instruction
+ end
+
+ end
+
+ # The base class of all tokenizers. It sets up the scanner and manages the
+ # looping until all tokens have been extracted. It also provides convenience
+ # methods to make sure adjacent tokens of identical groups are returned as
+ # a single token.
+ class Tokenizer
+
+ # The current group being processed by the tokenizer
+ attr_reader :group
+
+ # The current chunk of text being accumulated
+ attr_reader :chunk
+
+ # Start tokenizing. This sets up the state in preparation for tokenization,
+ # such as creating a new scanner for the text and saving the callback block.
+ # The block will be invoked for each token extracted.
+ def start( text, &block )
+ @chunk = ""
+ @group = :normal
+ @callback = block
+ @text = StringScanner.new( text )
+ setup
+ end
+
+ # Subclasses may override this method to provide implementation-specific
+ # setup logic.
+ def setup
+ end
+
+ # Finish tokenizing. This flushes the buffer, yielding any remaining text
+ # to the client.
+ def finish
+ start_group nil
+ teardown
+ end
+
+ # Subclasses may override this method to provide implementation-specific
+ # teardown logic.
+ def teardown
+ end
+
+ # Subclasses must implement this method, which is called for each iteration
+ # of the tokenization process. This method may extract multiple tokens.
+ def step
+ raise NotImplementedError, "subclasses must implement #step"
+ end
+
+ # Begins tokenizing the given text, calling #step until the text has been
+ # exhausted.
+ def tokenize( text, &block )
+ start text, &block
+ step until @text.eos?
+ finish
+ end
+
+ # Specify a set of tokenizer-specific options. Each tokenizer may (or may
+ # not) publish any options, but if a tokenizer does those options may be
+ # used to specify optional behavior.
+ def set( opts={} )
+ ( @options ||= Hash.new ).update opts
+ end
+
+ # Get the value of the specified option.
+ def option(opt)
+ @options ? @options[opt] : nil
+ end
+
+ private
+
+ EOL = /(?=\r\n?|\n|$)/
+
+ # A convenience for delegating method calls to the scanner.
+ def self.delegate( sym )
+ define_method( sym ) { |*a| @text.__send__( sym, *a ) }
+ end
+
+ delegate :bol?
+ delegate :eos?
+ delegate :scan
+ delegate :scan_until
+ delegate :check
+ delegate :check_until
+ delegate :getch
+ delegate :matched
+ delegate :pre_match
+ delegate :peek
+ delegate :pos
+
+ # Access the n-th subgroup from the most recent match.
+ def subgroup(n)
+ @text[n]
+ end
+
+ # Append the given data to the currently active chunk.
+ def append( data )
+ @chunk << data
+ end
+
+ # Request that a new group be started. If the current group is the same
+ # as the group being requested, a new group will not be created. If a new
+ # group is created and the current chunk is not empty, the chunk's
+ # contents will be yielded to the client as a token, and then cleared.
+ #
+ # After the new group is started, if +data+ is non-nil it will be appended
+ # to the chunk.
+ def start_group( gr, data=nil )
+ flush_chunk if gr != @group
+ @group = gr
+ @chunk << data if data
+ end
+
+ def start_region( gr, data=nil )
+ flush_chunk
+ @group = gr
+ @callback.call( Token.new( data||"", @group, :region_open ) )
+ end
+
+ def end_region( gr, data=nil )
+ flush_chunk
+ @group = gr
+ @callback.call( Token.new( data||"", @group, :region_close ) )
+ end
+
+ def flush_chunk
+ @callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
+ @chunk = ""
+ end
+
+ def subtokenize( syntax, text )
+ tokenizer = Syntax.load( syntax )
+ tokenizer.set @options if @options
+ flush_chunk
+ tokenizer.tokenize( text, &@callback )
+ end
+
+ end
+
+end
@@ -0,0 +1,27 @@
+require 'syntax'
+
+module Syntax
+ module Convertors
+
+ # The abstract ancestor class for all convertors. It implements a few
+ # convenience methods to provide a common interface for all convertors.
+ class Abstract
+
+ # A reference to the tokenizer used by this convertor.
+ attr_reader :tokenizer
+
+ # A convenience method for instantiating a new convertor for a
+ # specific syntax.
+ def self.for_syntax( syntax )
+ new( Syntax.load( syntax ) )
+ end
+
+ # Creates a new convertor that uses the given tokenizer.
+ def initialize( tokenizer )
+ @tokenizer = tokenizer
+ end
+
+ end
+
+ end
+end
@@ -0,0 +1,51 @@
+require 'syntax/convertors/abstract'
+
+module Syntax
+ module Convertors
+
+ # A simple class for converting a text into HTML.
+ class HTML < Abstract
+
+ # Converts the given text to HTML, using spans to represent token groups
+ # of any type but <tt>:normal</tt> (which is always unhighlighted). If
+ # +pre+ is +true+, the html is automatically wrapped in pre tags.
+ def convert( text, pre=true )
+ html = ""
+ html << "<pre>" if pre
+ regions = []
+ @tokenizer.tokenize( text ) do |tok|
+ value = html_escape(tok)
+ case tok.instruction
+ when :region_close then
+ regions.pop
+ html << "</span>"
+ when :region_open then
+ regions.push tok.group
+ html << "<span class=\"#{tok.group}\">#{value}"
+ else
+ if tok.group == ( regions.last || :normal )
+ html << value
+ else
+ html << "<span class=\"#{tok.group}\">#{value}</span>"
+ end
+ end
+ end
+ html << "</span>" while regions.pop
+ html << "</pre>" if pre
+ html
+ end
+
+ private
+
+ # Replaces some characters with their corresponding HTML entities.
+ def html_escape( string )
+ string.gsub( /&/, "&amp;" ).
+ gsub( /</, "&lt;" ).
+ gsub( />/, "&gt;" ).
+ gsub( /"/, "&quot;" )
+ end
+
+ end
+
+ end
+end
Oops, something went wrong.

0 comments on commit c8ee394

Please sign in to comment.