Permalink
Browse files

Merge branch 'master' of git@github.com:halostatue/mime-types

  • Loading branch information...
2 parents adb868e + 54cd419 commit ed8c1a9d87eff4189d919d0cacb939f26a1a7a6d @halostatue committed Feb 28, 2009
Showing with 131 additions and 249 deletions.
  1. +7 −0 .gitignore
  2. +123 −115 Rakefile
  3. +1 −1 lib/mime/types.rb.data
  4. +0 −133 type-lists/get-latest.rb
View
7 .gitignore
@@ -0,0 +1,7 @@
+*.swp
+html
+doc
+pkg
+publish
+coverage
+type-lists
View
238 Rakefile
@@ -42,8 +42,7 @@ Hoe.new PKG_NAME, PKG_VERSION do |p|
p.clean_globs << "coverage"
- p.spec_extras[:extra_rdoc_files] = MANIFEST.grep(/txt$/) -
- ["Manifest.txt"]
+ p.spec_extras[:extra_rdoc_files] = MANIFEST.grep(/txt$/) - ["Manifest.txt"]
end
desc "Build a MIME::Types .tar.gz distribution."
@@ -72,7 +71,7 @@ file PKG_TAR => [ :test ] do |t|
begin
unless File.directory?(File.dirname(t.name))
require 'fileutils'
- File.mkdir_p File.dirname(t.name)
+ FileUtils.mkdir_p File.dirname(t.name)
end
tf = File.open(t.name, 'wb')
gz = Zlib::GzipWriter.new(tf)
@@ -118,140 +117,149 @@ task :build_manifest do |t|
end
desc "Download the current MIME type registrations from IANA."
-task :download_from_iana do |t|
-#!/usr/bin/ruby -w
+task :iana, :save, :destination do |t, args|
+ save_type = args.save || :text
+ save_type = save_type.to_sym
+
+ case save_type
+ when :text, :both, :html
+ nil
+ else
+ raise "Unknown save type provided. Must be one of text, both, or html."
+ end
-require 'rubygems'
-require 'open-uri'
-require 'nokogiri'
-require 'cgi'
-
-class IANAParser
- include Comparable
-
- INDEX = %q(http://www.iana.org/assignments/media-types/)
- CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
- RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
- IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
- IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
-
- class << self
- def load_index
- @types ||= {}
-
- Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
- href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
- next if href_match.nil?
- type = href_match.captures[0]
- @types[tag.content] = IANAParser.new(tag.content, type)
+ destination = args.destination || "type-lists"
+
+ require 'open-uri'
+ require 'nokogiri'
+ require 'cgi'
+
+ class IANAParser
+ include Comparable
+
+ INDEX = %q(http://www.iana.org/assignments/media-types/)
+ CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
+ RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
+ IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
+ IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
+
+ class << self
+ def load_index
+ @types ||= {}
+
+ Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
+ href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
+ next if href_match.nil?
+ type = href_match.captures[0]
+ @types[tag.content] = IANAParser.new(tag.content, type)
+ end
end
- end
- attr_reader :types
- end
+ attr_reader :types
+ end
- def initialize(name, type)
- @name = name
- @type = type
- @url = File.join(INDEX, @type)
- end
+ def initialize(name, type)
+ @name = name
+ @type = type
+ @url = File.join(INDEX, @type)
+ end
- attr_reader :name
- attr_reader :type
- attr_reader :url
- attr_reader :html
+ attr_reader :name
+ attr_reader :type
+ attr_reader :url
+ attr_reader :html
- def download(name = nil)
- if name
- @html = Nokogiri::HTML(open(name) { |f| f.read })
- else
- @html = Nokogiri::HTML(open(@url) { |f| f.read })
+ def download(name = nil)
+ @html = Nokogiri::HTML(open(name || @url) { |f| f.read })
end
- end
- def save_html
- File.open("#@name.html", "wb") { |w| w.write @html }
- end
+ def save_html
+ File.open("#@name.html", "wb") { |w| w.write @html }
+ end
- def <=>(o)
- self.name <=> o.name
- end
+ def <=>(o)
+ self.name <=> o.name
+ end
- def parse
- nodes = html.xpath("//table//table//tr")
-
- # How many <td> children does the first node have?
- node_count = nodes.first.children.select { |node| node.elem? }.size
-
- @mime_types = nodes.map do |node|
- next if node == nodes.first
- elems = node.children.select { |n| n.elem? }
- next if elems.size.zero?
- raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
-
- case elems.size
- when 3
- subtype_index = 1
- refnode_index = 2
- when 4
- subtype_index = 1
- refnode_index = 3
- else
- raise "Unknown element size."
- end
+ def parse
+ nodes = html.xpath("//table//table//tr")
+
+ # How many <td> children does the first node have?
+ node_count = nodes.first.children.select { |node| node.elem? }.size
+
+ @mime_types = nodes.map do |node|
+ next if node == nodes.first
+ elems = node.children.select { |n| n.elem? }
+ next if elems.size.zero?
+ raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
+
+ case elems.size
+ when 3
+ subtype_index = 1
+ refnode_index = 2
+ when 4
+ subtype_index = 1
+ refnode_index = 3
+ else
+ raise "Unknown element size."
+ end
- subtype = elems[subtype_index].content.chomp.strip
- refnodes = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
- case ref['href']
- when CONTACT_PEOPLE
- tag = CGI::unescape($1).chomp.strip
- if tag == ref.content
+ subtype = elems[subtype_index].content.chomp.strip
+ refnodes = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
+ case ref['href']
+ when CONTACT_PEOPLE
+ tag = CGI::unescape($1).chomp.strip
+ if tag == ref.content
"[#{ref.content}]"
- else
+ else
"[#{ref.content}=#{tag}]"
- end
- when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
+ end
+ when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
"RFC#$1"
- when %r{(https?://.*)}
+ when %r{(https?://.*)}
"{#{ref.content}=#$1}"
- else
- ref
- end
- }
- refs = refnodes.join(',')
+ else
+ ref
+ end
+ }
+ refs = refnodes.join(',')
"#@type/#{subtype} 'IANA,#{refs}"
- end.compact
+ end.compact
- @mime_types
- end
+ @mime_types
+ end
- def save_text
- File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
+ def save_text
+ File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
+ end
end
-end
-puts "Downloading index of MIME types from #{IANAParser::INDEX}."
-IANAParser.load_index
-
-IANAParser.types.values.sort.each do |parser|
- next if parser.name == "example" or parser.name == "mime"
- puts "Downloading #{parser.name} from #{parser.url}"
- parser.download
- puts "Saving #{parser.name}.html"
- parser.save_html
- puts "Parsing #{parser.name}"
- parser.parse
- puts "Saving #{parser.name}.txt"
- parser.save_text
-end
+ puts "Downloading index of MIME types from #{IANAParser::INDEX}."
+ IANAParser.load_index
+
+ require 'fileutils'
+ FileUtils.mkdir_p destination
+ Dir.chdir destination do
+ IANAParser.types.values.sort.each do |parser|
+ next if parser.name == "example" or parser.name == "mime"
+ puts "Downloading #{parser.name} from #{parser.url}"
+ parser.download
+
+ if :html == save_type || :both == save_type
+ puts "Saving #{parser.name}.html"
+ parser.save_html
+ end
+
+ puts "Parsing #{parser.name} HTML"
+ parser.parse
-# foo = IANAParser.types['application']
-# foo.download("application.html")
-# foo.parse
-# foo = IANAParser.types['image']
-# foo.download("image.html")
-# foo.parse
+ if :text == save_type || :both == save_type
+ puts "Saving #{parser.name}.txt"
+ parser.save_text
+ end
+ end
+ end
end
desc "Shows known MIME type sources."
View
2 lib/mime/types.rb.data
@@ -1,4 +1,4 @@
-# vim: ft=ruby enc=utf-8
+# vim: ft=ruby encoding=utf-8
#--
# MIME::Types
# A Ruby implementation of a MIME Types information library. Based in spirit
View
133 type-lists/get-latest.rb
@@ -1,133 +0,0 @@
-#!/usr/bin/ruby -w
-
-require 'rubygems'
-require 'open-uri'
-require 'nokogiri'
-require 'cgi'
-
-class IANAParser
- include Comparable
-
- INDEX = %q(http://www.iana.org/assignments/media-types/)
- CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
- RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
- IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
- IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
-
- class << self
- def load_index
- @types ||= {}
-
- Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
- href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
- next if href_match.nil?
- type = href_match.captures[0]
- @types[tag.content] = IANAParser.new(tag.content, type)
- end
- end
-
- attr_reader :types
- end
-
- def initialize(name, type)
- @name = name
- @type = type
- @url = File.join(INDEX, @type)
- end
-
- attr_reader :name
- attr_reader :type
- attr_reader :url
- attr_reader :html
-
- def download(name = nil)
- if name
- @html = Nokogiri::HTML(open(name) { |f| f.read })
- else
- @html = Nokogiri::HTML(open(@url) { |f| f.read })
- end
- end
-
- def save_html
- File.open("#@name.html", "wb") { |w| w.write @html }
- end
-
- def <=>(o)
- self.name <=> o.name
- end
-
- def parse
- nodes = html.xpath("//table//table//tr")
-
- # How many <td> children does the first node have?
- node_count = nodes.first.children.select { |node| node.elem? }.size
-
- @mime_types = nodes.map do |node|
- next if node == nodes.first
- elems = node.children.select { |n| n.elem? }
- next if elems.size.zero?
- raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
-
- case elems.size
- when 3
- subtype_index = 1
- refnode_index = 2
- when 4
- subtype_index = 1
- refnode_index = 3
- else
- raise "Unknown element size."
- end
-
- subtype = elems[subtype_index].content.chomp.strip
- refnodes = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
- case ref['href']
- when CONTACT_PEOPLE
- tag = CGI::unescape($1).chomp.strip
- if tag == ref.content
- "[#{ref.content}]"
- else
- "[#{ref.content}=#{tag}]"
- end
- when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
- "RFC#$1"
- when %r{(https?://.*)}
- "{#{ref.content}=#$1}"
- else
- ref
- end
- }
- refs = refnodes.join(',')
-
- "#@type/#{subtype} 'IANA,#{refs}"
- end.compact
-
- @mime_types
- end
-
- def save_text
- File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
- end
-end
-
-puts "Downloading index of MIME types from #{IANAParser::INDEX}."
-IANAParser.load_index
-
-IANAParser.types.values.sort.each do |parser|
- next if parser.name == "example" or parser.name == "mime"
- puts "Downloading #{parser.name} from #{parser.url}"
- parser.download
- puts "Saving #{parser.name}.html"
- parser.save_html
- puts "Parsing #{parser.name}"
- parser.parse
- puts "Saving #{parser.name}.txt"
- parser.save_text
-end
-
-# foo = IANAParser.types['application']
-# foo.download("application.html")
-# foo.parse
-# foo = IANAParser.types['image']
-# foo.download("image.html")
-# foo.parse

0 comments on commit ed8c1a9

Please sign in to comment.