From 581a618b8ba91b73b361bf49cac0c3dec090d5c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20A=C3=9Fmann?= Date: Wed, 28 Sep 2011 23:07:48 +0200 Subject: [PATCH] Refactored core. * added support for threaded processing * switched to bundler gem building * added documentation --- .gitignore | 25 ++------- Gemfile | 6 +++ Gemfile.lock | 24 +++++++++ LICENSE | 2 +- README.markdown | 67 ++++++++++++++--------- Rakefile | 54 +------------------ VERSION | 1 - lib/rack-esi.rb | 108 ++++++++++---------------------------- lib/rack-esi/processor.rb | 53 +++++++++++++++++++ lib/rack-esi/threaded.rb | 50 ++++++++++++++++++ lib/rack-esi/version.rb | 7 +++ rack-esi.gemspec | 28 ++++++++++ test/_test.rb | 31 +++++++++++ test/rack-esi_test.rb | 6 +-- test/teststrap.rb | 10 ++-- 15 files changed, 286 insertions(+), 186 deletions(-) create mode 100644 Gemfile create mode 100644 Gemfile.lock delete mode 100644 VERSION create mode 100644 lib/rack-esi/processor.rb create mode 100644 lib/rack-esi/threaded.rb create mode 100644 lib/rack-esi/version.rb create mode 100644 rack-esi.gemspec create mode 100644 test/_test.rb diff --git a/.gitignore b/.gitignore index c1e0daf..4040c6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,4 @@ -## MAC OS -.DS_Store - -## TEXTMATE -*.tmproj -tmtags - -## EMACS -*~ -\#* -.\#* - -## VIM -*.swp - -## PROJECT::GENERAL -coverage -rdoc -pkg - -## PROJECT::SPECIFIC +*.gem +.bundle +Gemfile.lock +pkg/* diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..9a55caa --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source "http://rubygems.org" +gemspec + +gem "rack" +gem "nokogiri" +# gem "patron" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..ac93e85 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,24 @@ +PATH + remote: . + specs: + rack-esi (0.2.0) + nokogiri + rack + +GEM + remote: http://rubygems.org/ + specs: + nokogiri (1.5.0) + rack (1.3.3) + riot (0.12.5) + rr + rr (1.0.4) + +PLATFORMS + ruby + +DEPENDENCIES + nokogiri + rack + rack-esi! + riot diff --git a/LICENSE b/LICENSE index ff69134..0137d11 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 Florian Assmann +Copyright (c) 2009 Florian Aßmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/README.markdown b/README.markdown index cae1679..ccaa7e4 100644 --- a/README.markdown +++ b/README.markdown @@ -1,49 +1,66 @@ # rack-esi -Nokogiri based ESI middleware implementation for Rack with (limited) support -for include, remove and comment. +Rack-ESI is a Nokogiri based ESI middleware implementation for Rack with support for include tags, all other ESI namespaced nodes are just removed. + +To make this gem work you must define the (xmlns:esi)[http://www.edge-delivery.org/esi/1.0] namespace in your text/html response. + +Note: This gem should only be used in development. For production use setup varnish or any other ESI enabled server. ## Features - * path blacklisting (:skip => nil, expects Regexp) - * type whitelisting (:only => /^text\/(?:x|ht)ml/) - * recursion limit (:depth => 5) - * include limits (:includes => 32) - * support for <include> alt and noerror attributes + * threaded (in case we have slow IOs) + * PATH_INFO blacklisting (:skip => nil, should respond to ===) + * support for esi|include[alt] and esi|include[noerror] fallbacks + +## Dependencies + + * Nokogiri + * Rack + +## Setup + +### w/o Gemfile -_It's for development purpose..._ + $ gem install rack-esi -## Installation +### w/ Gemfile - gem install rack-esi + gem 'rack-esi' -## Rails Setup (environment.rb) +### rackup - config.gem 'rack-esi' - require 'rack-esi' - config.middleware.insert_before config.middleware.first, Rack::ESI + use Rack::ESI, options || {} + run Application.new + +### Rails: environment.rb + + config.gem 'rack-esi' # for setups w/o Gemfile + config.middleware.use Rack::ESI, options || {} + +## Options + + * poolsize: 4 + Number of worker threads. A value of 1 disables threading model. + * skip: nil + This should be an object which responds to #===(PATH_INFO). + * parser: Nokogiri::XML::Document + You can change this to Nokogiri::HTML::Document, but you should change the serializer, too (see below). + * serializer: :to_xhtml + The serializer value specifies the method name which is send to the object created by the parser#parse. ## TODO * write documentation * write more tests * support more ESI elements - * switch to Nokogiri::XML::SAX::Document? - -## Dependencies - - * Nokogiri - * Rack ## Note on Patches/Pull Requests * Fork the project. * Make your feature addition or bug fix. - * Add tests for it. This is important so I don't break it in a - future version unintentionally. + * Add tests for it. * Commit, do not mess with rakefile, version, or history. - (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull) - * Send me a pull request. Bonus points for topic branches. + * Send me a pull request. ## Thanks @@ -51,4 +68,4 @@ tenderlove and Qerub ## Copyright -Copyright (c) 2009 Florian Assmann. See LICENSE for details. +Copyright (c) 2009 Florian Aßmann. See LICENSE for details. diff --git a/Rakefile b/Rakefile index 541631f..2995527 100644 --- a/Rakefile +++ b/Rakefile @@ -1,53 +1 @@ -require 'rubygems' -require 'rake' - -begin - require 'jeweler' - Jeweler::Tasks.new do |gem| - gem.name = "rack-esi" - gem.summary = %Q{ESI middleware implementation for Rack.} - gem.description = %Q{Nokogiri based ESI middleware implementation for Rack with (limited) support for include, remove and comment.} - gem.email = "florian.assmann@email.de" - gem.homepage = "http://github.com/boof/rack-esi" - gem.authors = ["Florian Aßmann"] - gem.add_development_dependency "riot", ">= 0" - gem.add_development_dependency "yard", ">= 0" - gem.add_dependency 'nokogiri', '>= 0' - end - Jeweler::GemcutterTasks.new -rescue LoadError - puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler" -end - -require 'rake/testtask' -Rake::TestTask.new(:test) do |test| - test.libs << 'lib' << 'test' - test.pattern = 'test/**/*_test.rb' - test.verbose = true -end - -begin - require 'rcov/rcovtask' - Rcov::RcovTask.new do |test| - test.libs << 'test' - test.pattern = 'test/**/*_test.rb' - test.verbose = true - end -rescue LoadError - task :rcov do - abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" - end -end - -task :test => :check_dependencies - -task :default => :test - -begin - require 'yard' - YARD::Rake::YardocTask.new -rescue LoadError - task :yardoc do - abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard" - end -end +require "bundler/gem_tasks" diff --git a/VERSION b/VERSION deleted file mode 100644 index d917d3e..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.1.2 diff --git a/lib/rack-esi.rb b/lib/rack-esi.rb index c66abb2..e3280af 100644 --- a/lib/rack-esi.rb +++ b/lib/rack-esi.rb @@ -1,97 +1,47 @@ -require 'rack' -require 'nokogiri' +require 'bundler' +Bundler.require -class Rack::ESI - NS = { 'esi' => 'http://www.edge-delivery.org/esi/1.0' } - METHODS = { 'include' => :esi_include, 'remove' => nil, 'comment' => nil } - CSS = METHODS.keys.map { |cmd| "esi|#{ cmd }" } * ',' +require File.expand_path('../rack-esi/processor', __FILE__) - class Error < RuntimeError - def initialize(status, headers, response) - @status, @headers, @response = status, headers, response - end - def finish - return [@status, @headers, backtrace] - end - end +class Rack::ESI def initialize(app, options = {}) - @app = app - - @paths = options[:skip] - @types = options[:only] || /^text\/(?:x|ht)ml/ - @max_includes = options[:includes] || 32 - @max_recursion = options[:depth] || 5 + @parser = options.fetch :parser, Nokogiri::XML::Document + @serializer = options.fetch :serializer, :to_xhtml + @skip = options[:skip] + @poolsize = options.fetch :poolsize, 4 + @processor = @poolsize == 1 ? Processor::Linear : Processor::Threaded + + super app, options end - def call env, counter = { :recursion => 0, :includes => 0 } - return @app.call(env) if skip_path? env['PATH_INFO'] + def queue(&block) + unless @queue + @queue, @group = Queue.new, ThreadGroup.new + @poolsize.times { @group.add Worker.new(@queue) } - status, headers, input = @app.call env.dup - return status, headers, input if skip_type? headers['Content-Type'] - - output = [] - input.each { |body| output << compile_body(body, env, counter) } - - Rack::Response.new(output, status, headers).finish - end - - private - - def with_compiled_path(env, path) - # TODO: should compile variables. - env.merge 'PATH_INFO' => path, 'REQUEST_URI' => path + at_exit { Finisher.wait @queue } end - def fetch(path, env, counter) - call with_compiled_path(env, path), counter if path - rescue => e - return [500, {}, e.backtrace] - end + @queue.push block + end - # Should I use XML::SAX::Parser? - def compile_body(body, env, counter) - document = Nokogiri.XML body + def build_processor(env) + @processor.new self, env + end - document.css(CSS, NS).each do |node| - method = METHODS[node.name] and send method, node, env, counter - node.unlink - end + attr_reader :parser, :serializer - document.to_xhtml - end + def call(env) + return app.call(env) if @skip === env['PATH_INFO'] - def skip_path?(path) - @paths =~ path if @paths - end - def skip_type?(type) - @types !~ type - end + status, headers, body = app.call env.dup - def max?(counter) - not counter[:includes] < @max_includes && - counter[:recursion] < @max_recursion + if status == 200 and headers['Content-Type'] =~ /text\/html/ + body = build_processor(env).process body end - def esi_include(node, env, counter) - return if max? counter - - counter[:includes] += 1 - counter[:recursion] += 1 - - status, headers, response = fetch node['src'], env, counter - status, headers, response = fetch node['alt'], env, counter if status != 200 - - if status == 200 - data = '' - response.each { |inc| data << inc } - node.before data - elsif node['onerror'] != 'continue' - raise Error.new(status, headers, response) - end - - ensure - counter[:recursion] -= 1 - end + return status, headers, body + end end diff --git a/lib/rack-esi/processor.rb b/lib/rack-esi/processor.rb new file mode 100644 index 0000000..661dafe --- /dev/null +++ b/lib/rack-esi/processor.rb @@ -0,0 +1,53 @@ +class Rack::ESI + class Processor < Struct.new(:esi, :env) + + class Linear < self + def process_document(d) + d.xpath('//e:*', 'e' => NAMESPACE).each { |n| process_node n } + end + end + autoload :Threaded, File.expand_path('../threaded', __FILE__) + + NAMESPACE = 'http://www.edge-delivery.org/esi/1.0' + Error = Class.new RuntimeError + + def read(enumerable, buffer = '') + enumerable.each { |str| buffer << str } + buffer + end + + def include(path) + # RADAR patron here? + esi.call env.merge('PATH_INFO' => path, 'REQUEST_URI' => path) + rescue => e + return 500, {}, [] + end + def process_node(node) + case node.name + when 'include' + status, headers, body = include node['src'] + + unless status == 200 or node['alt'].nil? + status, headers, body = include node['alt'] + end + + if status == 200 + node.replace read(body) + elsif node['onerror'] != 'continue' + raise Error + end + else + node.remove + end + end + def process_document(document) + raise NotImplementedError + end + def process(body) + document = esi.parser.parse read(body) + process_document document + document.send esi.serializer + end + + end +end diff --git a/lib/rack-esi/threaded.rb b/lib/rack-esi/threaded.rb new file mode 100644 index 0000000..4aa2983 --- /dev/null +++ b/lib/rack-esi/threaded.rb @@ -0,0 +1,50 @@ +require 'thread' +#require 'timeout' + +class Rack::ESI + + class Finisher < Proc + def self.wait(queue) + finisher = new do |worker| + puts "Finishing #{ worker.inspect }..." + worker[:finish] = true + queue.push finisher + end + + # cast the first stone + queue.push finisher + + # wait at the end + queue.pop + end + end + + class Worker < Thread + def initialize(queue) + super do + begin + queue.pop[ self ] + rescue => e + puts e + end until key? :finish + end + end + end + + class Processor::Threaded < Processor + def process_document(document) + nodes = document.xpath '//e:*', 'e' => NAMESPACE + + countdown, main = nodes.length, Thread.current + nodes.each do |node| + esi.queue do + process_node node + main.run if (countdown -= 1).zero? + end + end + # TODO prevent nesting depth bigger than poolsize + Thread.stop if countdown > 0 # wait for worker + end + end + +end diff --git a/lib/rack-esi/version.rb b/lib/rack-esi/version.rb new file mode 100644 index 0000000..fd23193 --- /dev/null +++ b/lib/rack-esi/version.rb @@ -0,0 +1,7 @@ +module Rack + class ESI < Struct.new(:app, :options) + + VERSION = "0.2.0" + + end +end diff --git a/rack-esi.gemspec b/rack-esi.gemspec new file mode 100644 index 0000000..3a149d1 --- /dev/null +++ b/rack-esi.gemspec @@ -0,0 +1,28 @@ +# -*- encoding: utf-8 -*- +$:.push File.expand_path("../lib", __FILE__) +require "rack-esi/version" + +Gem::Specification.new do |s| + s.name = "rack-esi" + s.version = Rack::ESI::VERSION + s.authors = ["Florian Aßmann"] + s.email = ["florian.assmann@email.de"] + s.homepage = "" + s.summary = %q{ ESI middleware implementation for Rack. } + s.description = <<-EOF +Rack-ESI is a Nokogiri based ESI middleware implementation for Rack with support for include tags, all other ESI namespaced nodes are just removed. +To make this gem work you must define the (xmlns:esi)[http://www.edge-delivery.org/esi/1.0] namespace in your text/html response. +Note: This gem should only be used in development. For production use setup varnish or any other ESI enabled server. +EOF + # s.rubyforge_project = "rack-esi" + + s.files = `git ls-files`.split("\n") + s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") + s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } + s.require_paths = ["lib"] + + s.add_dependency "rack" + s.add_dependency "nokogiri" + # s.add_dependency "patron" + s.add_development_dependency "riot" +end diff --git a/test/_test.rb b/test/_test.rb new file mode 100644 index 0000000..e341953 --- /dev/null +++ b/test/_test.rb @@ -0,0 +1,31 @@ +require File.expand_path('../teststrap', __FILE__) + +context 'Rack::ESI' do + + __dirname__ = File.expand_path File.dirname(__FILE__) + root = Pathname.new File.join(__dirname__, 'fixtures') + opts = { :urls => ['/'], :root => root } + + setup { ESI.new Static.new(App.new, opts), skip: /raw/, :poolsize => 1 } + + context 'GET /raw.html' do + setup { MockRequest.new(topic).get '/raw.html' } + asserts('Content-Type') { topic.content_type }.equals 'text/html' + should('not be altered') { topic.body == root.join('raw.html').read } + end + + context 'GET /index.html' do + setup { MockRequest.new(topic).get '/index.html' } + + asserts('Content-Type') { topic.content_type }.equals 'text/html' + should('not have any ESI specific nodes') do + html(topic.body). + at('//e:*', 'e' => Rack::ESI::Processor::NAMESPACE).nil? + end + should('have meta replacement with content') do + not html(topic.body). + at("//meta[@name='replacement' and @content='content']").nil? + end + end + +end diff --git a/test/rack-esi_test.rb b/test/rack-esi_test.rb index f258810..e341953 100644 --- a/test/rack-esi_test.rb +++ b/test/rack-esi_test.rb @@ -1,4 +1,4 @@ -require 'teststrap' +require File.expand_path('../teststrap', __FILE__) context 'Rack::ESI' do @@ -6,7 +6,7 @@ root = Pathname.new File.join(__dirname__, 'fixtures') opts = { :urls => ['/'], :root => root } - setup { ESI.new Static.new(App.new, opts), :skip => /raw/ } + setup { ESI.new Static.new(App.new, opts), skip: /raw/, :poolsize => 1 } context 'GET /raw.html' do setup { MockRequest.new(topic).get '/raw.html' } @@ -20,7 +20,7 @@ asserts('Content-Type') { topic.content_type }.equals 'text/html' should('not have any ESI specific nodes') do html(topic.body). - at('//esi:include|//esi:remove|//esi:comment', Rack::ESI::NS).nil? + at('//e:*', 'e' => Rack::ESI::Processor::NAMESPACE).nil? end should('have meta replacement with content') do not html(topic.body). diff --git a/test/teststrap.rb b/test/teststrap.rb index 5219f79..f02c16c 100644 --- a/test/teststrap.rb +++ b/test/teststrap.rb @@ -1,8 +1,12 @@ require 'pathname' -require 'rubygems' -require 'riot' -require 'rack-esi' require 'rack/mock' +require 'rack/static' +require 'rack/file' + +require File.expand_path('../../lib/rack-esi', __FILE__) +Bundler.require :development + +Nokogiri def html(body) Nokogiri.HTML(body).root