forked from abhay/calais
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
getting the response processor working again
- Loading branch information
Showing
6 changed files
with
317 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
module Calais | ||
class Response | ||
MATCHERS = { | ||
:docinfo => 'DocInfo', | ||
:docinfometa => 'DocInfoMeta', | ||
:defaultlangid => 'DefaultLangId', | ||
:doccat => 'DocCat', | ||
:entities => 'type/em/e', | ||
:relations => 'type/em/r', | ||
:geographies => 'type/er', | ||
:instances => 'type/sys/InstanceInfo', | ||
:relevances => 'type/sys/RelevanceInfo', | ||
} | ||
|
||
attr_accessor :hashes, :entities, :relations, :geographies | ||
|
||
def initialize(rdf_string) | ||
@raw_response = rdf_string | ||
|
||
@hashes = [] | ||
@entities = [] | ||
@relations = [] | ||
@geographies = [] | ||
|
||
extract_data | ||
process_entities | ||
process_relations | ||
process_geographies | ||
end | ||
|
||
class Entity | ||
attr_accessor :hash, :type, :attributes | ||
end | ||
|
||
class Relation | ||
attr_accessor :hash, :type, :attributes | ||
end | ||
|
||
class Geography | ||
attr_accessor :name, :hash, :attributes | ||
end | ||
|
||
class CalaisHash | ||
attr_accessor :value | ||
|
||
def self.find_or_create(hash, hashes) | ||
selected = hashes.select {|h| h.value } | ||
|
||
if selected.empty? | ||
new_hash = self.new | ||
new_hash.value = hash | ||
hashes << new_hash | ||
new_hash | ||
else | ||
selected.first | ||
end | ||
end | ||
end | ||
|
||
private | ||
def extract_data | ||
doc = XML::Parser.string(@raw_response).parse | ||
|
||
@nodes = {} | ||
@nodes[:docinfo] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..") | ||
@nodes[:docinfo].each { |node| node.remove! } | ||
|
||
@nodes[:docinfometa] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..") | ||
@nodes[:docinfometa].each { |node| node.remove! } | ||
|
||
@nodes[:defaultlangid] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..") | ||
@nodes[:defaultlangid].each { |node| node.remove! } | ||
|
||
@nodes[:doccat] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..") | ||
@nodes[:doccat].each { |node| node.remove! } | ||
|
||
@nodes[:entities] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..") | ||
@nodes[:entities].each { |node| node.remove! } | ||
|
||
@nodes[:relations] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..") | ||
@nodes[:relations].each { |node| node.remove! } | ||
|
||
@nodes[:geographies] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..") | ||
@nodes[:geographies].each { |node| node.remove! } | ||
|
||
@nodes[:instances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..") | ||
@nodes[:instances].each { |node| node.remove! } | ||
|
||
@nodes[:relevances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..") | ||
@nodes[:relevances].each { |node| node.remove! } | ||
|
||
@nodes[:others] = doc.root.find("./*") | ||
@nodes[:others].each { |node| node.remove! } | ||
|
||
return | ||
end | ||
|
||
def extract_attributes(nodes) | ||
nodes.inject({}) do |hsh, node| | ||
value = if node['resource'] | ||
extracted_hash = node['resource'].split('/')[-1] rescue nil | ||
CalaisHash.find_or_create(extracted_hash, @hashes) | ||
else | ||
node.content | ||
end | ||
hsh.merge(node.name => value) | ||
end | ||
end | ||
|
||
def process_entities | ||
@entities = @nodes[:entities].map do |node| | ||
extracted_hash = node['about'].split('/')[-1] rescue nil | ||
|
||
entity = Entity.new | ||
entity.hash = CalaisHash.find_or_create(extracted_hash, @hashes) | ||
entity.type = node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1] rescue nil | ||
entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]")) | ||
|
||
entity | ||
end | ||
end | ||
|
||
def process_relations | ||
@relations = @nodes[:relations].map do |node| | ||
extracted_hash = node['about'].split('/')[-1] rescue nil | ||
|
||
relation = Relation.new | ||
relation.hash = CalaisHash.find_or_create(extracted_hash, @hashes) | ||
relation.type = node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1] rescue nil | ||
relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]")) | ||
|
||
relation | ||
end | ||
end | ||
|
||
def process_geographies | ||
@geographies = @nodes[:geographies].map do |node| | ||
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]")) | ||
|
||
geography = Geography.new | ||
geography.name = attributes.delete('name') | ||
geography.hash = attributes.delete('subject') | ||
geography.attributes = attributes | ||
|
||
geography | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
require File.join(File.dirname(__FILE__), %w[.. helper]) | ||
|
||
describe Calais::Response, :new do | ||
it 'accepts a json string to generate the response object' do | ||
lambda { Calais::Response.new(SAMPLE_RESPONSE) }.should_not raise_error | ||
end | ||
end | ||
|
||
|
||
describe Calais::Response, :new do | ||
before :all do | ||
@response = Calais::Response.new(SAMPLE_RESPONSE) | ||
end | ||
|
||
it 'should extract entities' do | ||
entities = @response.entities | ||
entities.map { |e| e.type }.sort.uniq.should == %w[City Continent Country IndustryTerm Organization Person ProvinceOrState] | ||
end | ||
|
||
it 'should extract relations' do | ||
relations = @response.relations | ||
relations.map { |e| e.type }.sort.uniq.should == %w[GenericRelations PersonAttributes PersonProfessional Quotation] | ||
end | ||
|
||
it 'should extract geographies' do | ||
geographies = @response.geographies | ||
geographies.map { |e| e.name }.sort.uniq.should == %w[Australia Hobart,Tasmania,Australia Tasmania,Australia] | ||
end | ||
end |
Oops, something went wrong.