Skip to content
This repository has been archived by the owner on Dec 22, 2021. It is now read-only.

Commit

Permalink
added support for Comment and Post entities. This is more complicated…
Browse files Browse the repository at this point in the history
… than I thought
  • Loading branch information
jcla1 committed Sep 16, 2012
1 parent 78b5230 commit 6096967
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 4 deletions.
3 changes: 2 additions & 1 deletion Gemfile
@@ -1,4 +1,5 @@
source "http://rubygems.org"

gem "rest-client", "~> 1.6.7"
gem "nokogiri", "~> 1.5.5"
gem "nokogiri", "~> 1.5.5"
gem "chronic", "~> 0.7.0"
1 change: 1 addition & 0 deletions hn2json.gemspec
Expand Up @@ -15,6 +15,7 @@ Gem::Specification.new do |s|

s.add_dependency "rest-client", "~> 1.6.7"
s.add_dependency "nokogiri", "~> 1.5.5"
s.add_dependency "chronic", "~> 0.7.0"

#s.files += Dir.glob("bin/**/*")
#s.executables = %w( hn2json )
Expand Down
1 change: 1 addition & 0 deletions lib/hn2json.rb
@@ -1,5 +1,6 @@
require 'rest-client'
require 'nokogiri'
require 'chronic'

module HN2JSON
autoload :Request, 'hn2json/request'
Expand Down
14 changes: 12 additions & 2 deletions lib/hn2json/entity.rb
Expand Up @@ -3,7 +3,7 @@ module HN2JSON
class Entity

attr_accessor :type, :id, :parent, :url, :title, :comments, :votes
attr_accessor :full_text, :posted_by, :date_posted, :voting_on
attr_accessor :fulltext, :posted_by, :date_posted, :voting_on

def initialize id
@id = id
Expand All @@ -12,7 +12,7 @@ def initialize id
@parent = nil
@url = nil
@title = nil
@full_text = nil
@fulltext = nil
@posted_by = nil
@date_posted = nil
@voting_on = nil
Expand All @@ -21,6 +21,8 @@ def initialize id

get_page
determine_type

get_attrs
end

def get_page
Expand All @@ -32,6 +34,14 @@ def determine_type
@type = @parser.determine_type
end

def get_attrs
eval("@parser.get_attrs_#{@type.to_s} self")
end

def add_attrs
yield self
end

end

end
97 changes: 97 additions & 0 deletions lib/hn2json/parser.rb
Expand Up @@ -29,6 +29,103 @@ def determine_type

end

def get_attrs_comment entity

parent_url = @doc.css('.comhead a')[2]['href']
parent_regex = /id\=(.*)/
parent = parent_regex.match(parent_url)[1]

fulltext_nolinks = @doc.css('.comment')[0].to_s
fulltext_nolinks = fulltext_nolinks.gsub(/\<a\shref\=['"](.*)['"].*rel\=.*\>.*\<\/a\>/, '\1')
fulltext = fulltext_nolinks.gsub(/<\/?[^>]*>/, '')

comhead = @doc.css('.comhead')[0]

date_regex = /.*\s(.*\s.*\sago)/
ago = date_regex.match(comhead.content)[1]
date_posted = Chronic.parse(ago).to_s

posted_by = comhead.css('a')[0].content

comments = []

full_comments = @doc.css('td > img[width="0"]').xpath("..").xpath("..").css('.default')

full_comments.each do |comment|
comment_id = comment.css('span a')[1]['href'].gsub("item?id=", '')
comments.push comment_id
end

entity.add_attrs do |e|
e.parent = parent
e.fulltext = fulltext
e.date_posted = date_posted
e.comments = comments
e.posted_by = posted_by
end
end


def get_attrs_post entity

subtext = @doc.css('.subtext')[0]

date_regex = /.*\s(.*\s.*\sago)/
ago = date_regex.match(subtext.content)[1]
date_posted = Chronic.parse(ago).to_s

posted_by = subtext.css('a')[0].content

votes = subtext.css('span')[0].content.to_i

comments = []

full_comments = @doc.css('td > img[width="0"]').xpath("..").xpath("..").css('.default')

full_comments.each do |comment|
comment_id = comment.css('span a')[1]['href'].gsub("item?id=", '')
comments.push comment_id
end

head = @doc.css('.title a')[0]

title = head.content

url = head['href']

entity.add_attrs do |e|
e.url = url
e.title = title
e.date_posted = date_posted
e.comments = comments
e.votes = votes
e.posted_by = posted_by
end
end

# def get_attrs_poll entity
# entity.add_attrs do |e|
# e.title =
# e.fulltext =
# e.date_posted =
# e.posted_by =
# e.votes =
# e.comments =
# voting_on =
# end
# end
#
# def get_attrs_discussion entity
# entity.add_attrs do |e|
# e.title =
# e.fulltext =
# e.date_posted =
# e.posted_by =
# e.comments =
# e.votes =
# end
# end

end

end
2 changes: 1 addition & 1 deletion lib/hn2json/version.rb
@@ -1,3 +1,3 @@
module HN2JSON
VERSION = '0.0.0'
VERSION = '0.0.1'
end

0 comments on commit 6096967

Please sign in to comment.