Skip to content

Commit

Permalink
new version of libxml-ruby will break response processor due to nodes…
Browse files Browse the repository at this point in the history
… not belonging to documents. might as well do a single pass
  • Loading branch information
abhay committed Dec 19, 2008
1 parent bafa1c7 commit 6e2c48e
Showing 1 changed file with 62 additions and 91 deletions.
153 changes: 62 additions & 91 deletions lib/calais/response.rb
Expand Up @@ -25,12 +25,6 @@ def initialize(rdf_string)
@categories = []

extract_data

process_relevances
process_entities
process_relations
process_geographies
process_categories
end

class Entity
Expand Down Expand Up @@ -82,75 +76,29 @@ def self.find_or_create(hash, hashes)
private
def extract_data
doc = XML::Parser.string(@raw_response).parse

@nodes = {}
@nodes[:docinfo] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..")
@nodes[:docinfo].each { |node| node.remove! }

@nodes[:docinfometa] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..")
@nodes[:docinfometa].each { |node| node.remove! }

@nodes[:defaultlangid] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..")
@nodes[:defaultlangid].each { |node| node.remove! }

@nodes[:doccat] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..")
@nodes[:doccat].each { |node| node.remove! }

@nodes[:entities] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..")
@nodes[:entities].each { |node| node.remove! }

@nodes[:relations] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..")
@nodes[:relations].each { |node| node.remove! }

@nodes[:geographies] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..")
@nodes[:geographies].each { |node| node.remove! }

@nodes[:instances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..")
@nodes[:instances].each { |node| node.remove! }

@nodes[:relevances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..")
@nodes[:relevances].each { |node| node.remove! }

@nodes[:others] = doc.root.find("./*")
@nodes[:others].each { |node| node.remove! }

return
end

def extract_attributes(nodes)
nodes.inject({}) do |hsh, node|
value = if node['resource']
extracted_hash = node['resource'].split('/')[-1] rescue nil
CalaisHash.find_or_create(extracted_hash, @hashes)
else
node.content
end
hsh.merge(node.name => value)
end
end

def process_relevances
@nodes[:relevances].each do |node|
subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]

@relevances[subject_hash] = node.find_first("c:relevance").content.to_f
end

@relevances
end

def process_categories
@categories = @nodes[:doccat].map do |node|

doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each { |node| node.remove! }
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each { |node| node.remove! }
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }

@categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
category = Category.new
category.name = node.find_first("c:categoryName").content
category.score = node.find_first("c:score").content.to_f


node.remove!
category
end
end

@relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
acc[subject_hash] = node.find_first("c:relevance").content.to_f

node.remove!
acc
end

def process_entities
@entities = @nodes[:entities].map do |node|
@entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil

entity = Entity.new
Expand All @@ -160,47 +108,70 @@ def process_entities

relevance = @relevances[extracted_hash]
entity.relevance = relevance if relevance

instance_nodes = @nodes[:instances].select {|n|
n.find_first("c:subject")[:resource].split("/")[-1] == extracted_hash
}

entity.instances = instance_nodes.map {|n| Instance.from_node(n) }


entity.instances = get_instance_nodes(doc, extracted_hash).map do |instance_node|
instance = Instance.from_node(instance_node)
instance_node.remove!

instance
end

node.remove!
entity
end
end

def process_relations
@relations = @nodes[:relations].map do |node|

@relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil

relation = Relation.new
relation.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
relation.type = node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1] rescue nil
relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))

instance_nodes = @nodes[:instances].select {|n|
n.find_first("c:subject")[:resource].split("/")[-1] == extracted_hash
}

relation.instances = instance_nodes.map {|n| Instance.from_node(n) }


relation.instances = get_instance_nodes(doc, extracted_hash).map do |instance_node|
instance = Instance.from_node(instance_node)
instance_node.remove!

instance
end

node.remove!
relation
end
end

def process_geographies
@geographies = @nodes[:geographies].map do |node|

@geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))

geography = Geography.new
geography.name = attributes.delete('name')
geography.hash = attributes.delete('subject')
geography.attributes = attributes

node.remove!
geography
end

doc.root.find("./*").each { |node| node.remove! }

return
end

def get_instance_nodes(doc, hash)
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
end
end

def extract_attributes(nodes)
nodes.inject({}) do |hsh, node|
value = if node['resource']
extracted_hash = node['resource'].split('/')[-1] rescue nil
CalaisHash.find_or_create(extracted_hash, @hashes)
else
node.content
end
hsh.merge(node.name => value)
end
end
end
end

0 comments on commit 6e2c48e

Please sign in to comment.