Skip to content
Browse files

Test Contentizer and implement up to basic post parsing.

  • Loading branch information...
1 parent 81c894b commit 580f801ca473af0416aa141a176b96670c6004e1 @dodecaphonic committed May 26, 2011
Showing with 306 additions and 0 deletions.
  1. +49 −0 .rvmrc
  2. +10 −0 Gemfile
  3. +18 −0 Gemfile.lock
  4. +6 −0 Rakefile
  5. +11 −0 lib/tumble_out.rb
  6. +52 −0 lib/tumble_out/contentizer.rb
  7. +29 −0 lib/tumble_out/post.rb
  8. +82 −0 test/assets/sample.xml
  9. +49 −0 test/unit/test_contentizer.rb
View
49 .rvmrc
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+# This is an RVM Project .rvmrc file, used to automatically load the ruby
+# development environment upon cd'ing into the directory
+
+# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional.
+environment_id="ruby-1.9.2-p180@tumbleout"
+
+#
+# First we attempt to load the desired environment directly from the environment
+# file. This is very fast and efficicent compared to running through the entire
+# CLI and selector. If you want feedback on which environment was used then
+# insert the word 'use' after --create as this triggers verbose mode.
+#
+if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
+
+ [[ -s ".rvm/hooks/after_use" ]] && . ".rvm/hooks/after_use"
+else
+ # If the environment file has not yet been created, use the RVM CLI to select.
+ rvm --create use "$environment_id"
+fi
+
+#
+# If you use an RVM gemset file to install a list of gems (*.gems), you can have
+# it be automatically loaded. Uncomment the following and adjust the filename if
+# necessary.
+#
+# filename=".gems"
+# if [[ -s "$filename" ]] ; then
+# rvm gemset import "$filename" | grep -v already | grep -v listed | grep -v complete | sed '/^$/d'
+# fi
+
+#
+# If you use bundler and would like to run bundle each time you enter the
+# directory, you can uncomment the following code.
+#
+# # Ensure that Bundler is installed. Install it if it is not.
+# if ! command -v bundle >/dev/null; then
+# printf "The rubygem 'bundler' is not installed. Installing it now.\n"
+# gem install bundler
+# fi
+#
+# # Bundle while reducing excess noise.
+# printf "Bundling your gems. This may take a few minutes on a fresh clone.\n"
+# bundle | grep -v '^Using ' | grep -v ' is complete' | sed '/^$/d'
+#
+
View
10 Gemfile
@@ -0,0 +1,10 @@
+source :gemcutter
+
+gem "nokogiri", "~> 1.4.4"
+gem "rpeg-markdown", "~> 1.4.6"
+
+group :development do
+ gem "minitest", "~> 2.1.0"
+ gem "mocha", "~> 0.9"
+ gem "rake", "~> 0.9.0"
+end
View
18 Gemfile.lock
@@ -0,0 +1,18 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ minitest (2.1.0)
+ mocha (0.9.12)
+ nokogiri (1.4.4)
+ rake (0.9.0)
+ rpeg-markdown (1.4.6)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ minitest (~> 2.1.0)
+ mocha (~> 0.9)
+ nokogiri (~> 1.4.4)
+ rake (~> 0.9.0)
+ rpeg-markdown (~> 1.4.6)
View
6 Rakefile
@@ -0,0 +1,6 @@
+require "bundler/setup"
+require "rake/testtask"
+
+Rake::TestTask.new do |t|
+ t.pattern = "test/**/test_*.rb"
+end
View
11 lib/tumble_out.rb
@@ -0,0 +1,11 @@
+require "bundler/setup"
+
+require "net/http"
+require "nokogiri"
+require "markdown"
+require "fileutils"
+
+$LOAD_PATH.unshift File.dirname(__FILE__)
+
+require "tumble_out/contentizer"
+require "tumble_out/post"
View
52 lib/tumble_out/contentizer.rb
@@ -0,0 +1,52 @@
+module TumbleOut
+ class Contentizer
+ attr_reader :url
+
+ def initialize(url)
+ @url = url
+ @total_posts = 0
+ @chunk_size = 50
+ @posts = []
+ @done = false
+ end
+
+ def posts
+ unless @done
+ chunk = raw_posts(@posts.size)
+ @posts += chunk.map { |rp| parse rp }
+ end
+
+ @posts
+ end
+
+ def each_post(&blk)
+ all_posts.each &blk
+ end
+
+ def dump(directory)
+ directory = File.join(directory, @url, "posts")
+
+ if !File.exist?(directory)
+ FileUtils.mkdir_p directory
+ end
+
+ posts.each { |p| p.dump directory }
+ end
+
+ private
+ def parse(raw_post)
+ post = Post.new(raw_post["type"],
+ Time.at(raw_post["unix-timestamp"].to_i),
+ raw_post["slug"],
+ raw_post["format"])
+
+ post
+ end
+
+ def raw_posts(offset=0)
+ doc = Nokogiri::XML(Net::HTTP.get(URI.parse("http://#{@url}/api/read")))
+
+ doc.search "post"
+ end
+ end
+end
View
29 lib/tumble_out/post.rb
@@ -0,0 +1,29 @@
+module TumbleOut
+ class Post
+ attr_reader :type, :date, :format
+ attr_accessor :title, :body, :slug
+
+ def initialize(type, date, slug, format="markdown")
+ @type = type
+ @date = date
+ @format = format
+ @title = nil
+ @body = nil
+ @slug = slug
+ end
+
+ def dump(directory)
+ full_path = File.join(directory, create_file_name)
+ open(full_path, "w") { |f| f << @body }
+ end
+
+ private
+ def create_file_name
+ if @slug.nil?
+ raise StandardError, "Missing slug"
+ else
+ "#{@date.year}-#{@date.month}-#{@date.day}-#{@slug}.markdown"
+ end
+ end
+ end
+end
View
82 test/assets/sample.xml
@@ -0,0 +1,82 @@
+<tumblr version="1.0">
+ <tumblog name="sample" timezone="US/Eastern" cname="sample.tumblr.com" title="Break this parser">
+ </tumblog>
+ <posts start="0" total="7">
+ <post id="7" url="http://sample.tumblr.com/post/7" url-with-slug="http://52livros.com/post/7/sluggy" type="photo" date-gmt="2011-05-08 15:45:00 GMT" date="Sun, 08 May 2011 11:45:00" unix-timestamp="1304869500" format="markdown" reblog-key="76zsD8ZR" slug="sluggy" width="2592" height="1936">
+ <photo-caption>A really big photo.</photo-caption>
+ <photo-url max-width="1280">
+ http://sample.tumblr.com/photo/1280/1/1/tumblr_lkvvrj3U2s1qaeuyt
+ </photo-url>
+ </post>
+
+ <post id="6" url="http://sample.tumblr.com/post/6" url-with-slug="http://sample.tumblr.com/post/6/berlin-looking-for-a-photo-assistant-as-well-as-a" type="regular" date-gmt="2011-05-18 20:04:32 GMT" date="Wed, 18 May 2011 22:04:32" unix-timestamp="1305749072" format="html" reblog-key="Gnl0K3xe" slug="berlin-looking-for-a-photo-assistant-as-well-as-a">
+ <regular-title>
+ Berlin: Looking for a photo assistant as well as a make-up artist on May 27.
+ </regular-title>
+ <regular-body><p>Interested?</p> <p>Update: FOUND!</p></regular-body>
+ </post>
+
+ <post id="5" url="http://sample.tumblr.com/post/5" url-with-slug="http://sample.tumblr.com/post/5/arcade-fire-feat-david-byrne-speaking-in-tongues" type="audio" date-gmt="2011-05-26 11:56:32 GMT" date="Thu, 26 May 2011 13:56:32" unix-timestamp="1306410992" format="html" reblog-key="QjEdFy9d" slug="arcade-fire-feat-david-byrne-speaking-in-tongues" audio-plays="11601">
+ <audio-caption>
+ <p>Arcade Fire (feat. David Byrne) - Speaking In Tongues</p> <p><i>Source:</i> <a href="http://david-noel.com/post/5830608088" class="tumblr_blog">david-noel</a></p>
+ </audio-caption>
+ <audio-player>
+ <embed type="application/x-shockwave-flash" src="http://assets.tumblr.com/swf/audio_player.swf?audio_file=http://www.tumblr.com/audio_file/5863286892/tumblr_llr1crZHWZ1qz8306&color=FFFFFF&logo=soundcloud" height="27" width="207" quality="best"></embed>
+ </audio-player>
+ <id3-title>
+ Arcade Fire - Speaking in Tongues (feat. David Byrne)
+ </id3-title>
+ </post>
+
+ <post id="4" url="http://sample.tumblr.com/post/4" url-with-slug="http://sample.tumblr.com/post/4/but-we-prefer-berlin-for-startups" type="quote" date-gmt="2011-05-22 18:27:40 GMT" date="Sun, 22 May 2011 20:27:40" unix-timestamp="1306088860" format="html" reblog-key="LsuHYlX3" slug="but-we-prefer-berlin-for-startups">
+ <quote-text>
+ If London feels like a European New York; Berlin feels like a European Portland or an Austin or a Boulder. And perhaps one day a European San Francisco.
+ </quote-text>
+ <quote-source>
+ <a href="http://techcrunch.com/2011/05/22/witn-london-is-gonna-be-pissed-but-we-prefer-berlin-for-startups-tctv/">TechCrunch: London is gonna be pissed, but we prefer Berlin for Startups</a>
+ </quote-source>
+ <tag>Berlin</tag>
+ <tag>startup</tag>
+ </post>
+
+ <post id="3" url="http://sample.tumblr.com/post/3" url-with-slug="http://sample.tumblr.com/post/3/the-tale-of-a-fairy" type="video" date-gmt="2011-05-22 12:42:02 GMT" date="Sun, 22 May 2011 14:42:02" unix-timestamp="1306068122" format="html" reblog-key="Vsrp4fBq" slug="the-tale-of-a-fairy">
+ <video-caption>
+ <p><a href="http://vimeo.com/23579845">Karl Lagerfeld for Chanel Cruise 2012 - The Tale of a Fairy</a></p> <p><i>Models: Amanda Harlech, Kristen McMenamy, Freja Beha, Bianca Balti, Baptiste Giabiconi, Brad Koening, Jake Davies, Mark Vanderloo, Oriol Elcacho, Sebastien Jondeau, Seth Kuhlmann and the film star - Anna Mouglalis </i></p>
+ </video-caption>
+ <video-source>
+ <iframe src="http://player.vimeo.com/video/23579845?title=0&amp;byline=0&amp;portrait=0&amp;color=ffffff" width="700" height="394" frameborder="0"></iframe>
+ </video-source>
+ <video-player>
+ <iframe src="http://player.vimeo.com/video/23579845?title=0&amp;byline=0&amp;portrait=0&amp;color=ffffff" width="400" height="225" frameborder="0"></iframe>
+ </video-player>
+ <video-player max-width="500">
+ <iframe src="http://player.vimeo.com/video/23579845?title=0&amp;byline=0&amp;portrait=0&amp;color=ffffff" width="500" height="281" frameborder="0"></iframe>
+ </video-player>
+ <video-player max-width="250">
+ <iframe src="http://player.vimeo.com/video/23579845?title=0&amp;byline=0&amp;portrait=0&amp;color=ffffff" width="250" height="140" frameborder="0"></iframe>
+ </video-player>
+ </post>
+
+ <post id="2" url="http://tumblr.tumblr.com/post/2" url-with-slug="http://tumblr.tumblr.com/post/2/were-gonna-have-to-rethink-this-whole-every-two" type="answer" date-gmt="2011-05-25 16:31:16 GMT" date="Wed, 25 May 2011 12:31:16" unix-timestamp="1306341076" format="html" reblog-key="vgwc9Q2O" slug="were-gonna-have-to-rethink-this-whole-every-two">
+ <question>
+ We&#039;re gonna have to rethink this whole &quot;Every two years&quot; thing. If we both live to be 80 that&#039;s still less than 30 times. That&#039;s not enough. Nowhere near. And when we&#039;re still taking naked photos at 80 we&#039;re going to (hilariously) make people uncomfortable (and not give a fuck).<br />
+ <br />
+ Every year. At least. When I&#039;m living on the same continent you are once again. We can meet in the middle sometimes if we have too. ;-)
+ </question>
+ <answer>
+ <p>Agreed. Once we share a continent, once a year. AT LEAST.</p> <p>I love you, Jack Scoresby.</p>
+ </answer>
+ </post>
+
+ <post id="1" url="http://tumblr.tumblr.com/post/1" url-with-slug="http://tumblr.tumblr.com/post/1/talkety-talk-talk" type="conversation" date-gmt="2011-05-25 16:31:16 GMT" date="Wed, 25 May 2011 12:31:16" unix-timestamp="1306341076" format="html" reblog-key="vgwc9Q2O" slug="talkety-talk-talk">
+ <conversation-title>And that's what tortures me</conversation-title>
+ <conversation-text>
+ Me: Is this a test? Her: Of things to come? Me: Have things come? Her: I have not.
+ </conversation-text>
+ <conversation>
+ <line name="Me" label="Me:">Is this a test?</line>
+ <line name="Her" label="Her:">Of things to come?</line>
+ <line name="Me" label="Me:">Have things come?</line>
+ <line name="Her" label="Her:">I have not.</line>
+ </conversation>
+ </post>
View
49 test/unit/test_contentizer.rb
@@ -0,0 +1,49 @@
+require 'minitest/autorun'
+require 'mocha'
+
+require File.join(File.dirname(__FILE__), "..", "..", "lib", "tumble_out")
+
+class TestContentizer < MiniTest::Unit::TestCase
+ def setup
+ raw_data = open(
+ File.join(File.dirname(__FILE__), "..",
+ "assets", "sample.xml")
+ )
+
+ Net::HTTP.expects(:get).
+ with(URI.parse("http://sample.tumblr.com/api/read")).returns raw_data
+ @contentizer = TumbleOut::Contentizer.new("sample.tumblr.com")
+ end
+
+ def test_if_number_of_posts_is_correct
+ posts = @contentizer.posts
+
+ assert_equal 7, posts.size
+ end
+
+ def test_that_post_types_are_of_a_given_count
+ types = @contentizer.posts.map { |p| p.type }
+
+ assert_equal 7, types.size
+
+ end
+
+ def test_whether_posts_are_of_specific_types
+ valid_types = %w(audio regular video quote photo
+ answer conversation).sort
+ post_types = @contentizer.posts.map { |p|
+ p.type
+ }.uniq.sort
+
+ assert_equal valid_types, post_types
+ end
+
+ def test_that_dump_creates_a_file_for_each_post
+ full_path = File.join("/tmp", @contentizer.url,
+ "posts", "*.markdown")
+
+ @contentizer.dump "/tmp"
+
+ assert_equal 7, Dir.glob(full_path).size
+ end
+end

0 comments on commit 580f801

Please sign in to comment.
Something went wrong with that request. Please try again.