Permalink
Browse files

added more Entity fields, had an annoying bug with the encoding of th…

…e HTML. Added first exception-class. Added find method to HN2JSON:Module
  • Loading branch information...
1 parent 6096967 commit 11262414b781ace69159b47617db5181b1e4a4d0 @jcla1 committed Sep 29, 2012
Showing with 137 additions and 35 deletions.
  1. +16 −0 lib/hn2json.rb
  2. +11 −1 lib/hn2json/entity.rb
  3. +4 −0 lib/hn2json/exceptions.rb
  4. +100 −32 lib/hn2json/parser.rb
  5. +5 −1 lib/hn2json/request.rb
  6. +1 −1 lib/hn2json/version.rb
View
@@ -3,12 +3,28 @@
require 'chronic'
module HN2JSON
+ extend HN2JSON
+
autoload :Request, 'hn2json/request'
autoload :Parser, 'hn2json/parser'
autoload :Entity, 'hn2json/entity'
+ autoload :InvalidIdError, 'hn2json/exceptions'
autoload :VERSION, 'hn2json/version'
+
+ def find id
+ check_for_falsy_id id
+ Entity.new id
+ end
+
+ private
+
+ def check_for_falsy_id id
+ if id.class != Fixnum || id < 1
+ raise InvalidIdError, "id must be > 0 and a Fixnum, you passed #{id}"
+ end
+ end
end
View
@@ -35,7 +35,17 @@ def determine_type
end
def get_attrs
- eval("@parser.get_attrs_#{@type.to_s} self")
+ case @type
+ when :post
+ @parser.get_attrs_post self
+ when :comment
+ @parser.get_attrs_comment self
+ when :poll
+ @parser.get_attrs_poll self
+ when :discussion
+ @parser.get_attrs_discussion self
+ when :error
+ end
end
def add_attrs
@@ -0,0 +1,4 @@
+module HN2JSON
+ class InvalidIdError < StandardError
+ end
+end
View
@@ -4,26 +4,31 @@ module HN2JSON
class Parser
def initialize response
- @doc = Nokogiri::HTML::DocumentFragment.parse response.html
+
+ html = response.html
+ html.force_encoding "UTF-8"
+ @doc = Nokogiri::HTML::DocumentFragment.parse html
end
def determine_type
title = @doc.css('.title a')
if title.length < 1
- return :comment
+ if @doc.css('td').length > 7
+ return :comment
+ else
+ return :error
+ end
else
- forms = @doc.css('td form')
- if forms.length === 1
+ td = @doc.css('td')[10]
+
+ if td.css('table').length > 0
return :poll
+ elsif td.content != ''
+ return :discussion
else
- forms = @doc.css('td')[10].css('form')
- if forms.length === 1
- return :post
- else
- return :discussion
- end
+ return :post
end
end
@@ -103,28 +108,91 @@ def get_attrs_post entity
end
end
-# def get_attrs_poll entity
-# entity.add_attrs do |e|
-# e.title =
-# e.fulltext =
-# e.date_posted =
-# e.posted_by =
-# e.votes =
-# e.comments =
-# voting_on =
-# end
-# end
-#
-# def get_attrs_discussion entity
-# entity.add_attrs do |e|
-# e.title =
-# e.fulltext =
-# e.date_posted =
-# e.posted_by =
-# e.comments =
-# e.votes =
-# end
-# end
+ def get_attrs_poll entity
+
+ title = @doc.css('.title a')[0].content
+
+ td = @doc.css('td')[10]
+
+ if td.css('table').length > 0
+ fulltext = ''
+ voting_on = voting_on_from_table td.css('table')[0]
+ else
+ fulltext = td.content
+ voting_on = voting_on_from_table @doc.css('td')[12].css('table')[0]
+ end
+
+
+ entity.add_attrs do |e|
+ e.title = title
+ e.fulltext = fulltext
+ #e.date_posted = date_posted
+ #e.posted_by = posted_by
+ #e.votes = votes
+ #e.comments = comments
+ #e.voting_on = voting_on
+ end
+ end
+
+ def get_attrs_discussion entity
+
+ title = @doc.css('.title a')[0].content
+
+ fulltext = @doc.css('td')[10].content
+
+ subtext = @doc.css('.subtext')[0]
+
+ date_regex = /.*\s(.*\s.*\sago)/
+ ago = date_regex.match(subtext.content)[1]
+ date_posted = Chronic.parse(ago).to_s
+
+ posted_by = subtext.css('a')[0].content
+
+ votes = subtext.css('span')[0].content.to_i
+
+ comments = []
+
+ full_comments = @doc.css('td > img[width="0"]').xpath("..").xpath("..").css('.default')
+
+ full_comments.each do |comment|
+ comment_id = comment.css('span a')[1]['href'].gsub("item?id=", '')
+ comments.push comment_id
+ end
+
+ entity.add_attrs do |e|
+ e.title = title
+ e.fulltext = fulltext
+ e.date_posted = date_posted
+ e.posted_by = posted_by
+ e.comments = comments
+ e.votes = votes
+ end
+ end
+
+ def voting_on_from_table table
+ trs = table.css('tr')
+
+ voting_on = []
+
+ (trs.length / 3).times do
+ voting_on.push []
+ end
+
+ i = 0
+ while i <= trs.length
+ if i + 1 % 3 != 0
+ if i % 2 == 0
+ puts i % 3
+ voting_on[(i - 1) % 3].push trs[i].css('.comment > div > font')[0].content
+ else
+ voting_on[i % 3].push trs[i].css('.default > .comhead > span')[0].content.gsub(/\spoints?/, '')
+ end
+ end
+ i += 1
+ end
+
+ return []
+ end
end
View
@@ -13,7 +13,11 @@ def initialize id
private
def request_page
- @html = RestClient.get @complete_url
+ begin
+ @html = RestClient.get @complete_url
+ rescue Exception
+ @html = ""
+ end
end
end
View
@@ -1,3 +1,3 @@
module HN2JSON
- VERSION = '0.0.1'
+ VERSION = '0.0.2'
end

0 comments on commit 1126241

Please sign in to comment.