Skip to content

Commit

Permalink
getting the response processor working again
Browse files Browse the repository at this point in the history
  • Loading branch information
abhay committed Nov 10, 2008
1 parent 55ec8d7 commit b5a1549
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 2 deletions.
9 changes: 8 additions & 1 deletion README.txt
Expand Up @@ -6,7 +6,8 @@ A Ruby interface to the Open Calais Web Service (http://opencalais.com)
* Accepts documents in text/plain, text/xml and text/html format.
* Basic access to the Open Calais API's Enlighten action.
* Output is RDF representation of input document.

* Single function ability to extract names, entities and geographies from given text.

== Synopsis

This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call:
Expand All @@ -15,6 +16,12 @@ This is a very basic wrapper to the Open Calais API. It uses the POST endpoint a

This is the easiest way to get the RDF-formated response from the OpenCalais service.

If you want to do something more fun like getting all sorts of fun information about a document, you can try this:

Calais.process_document(:content => "The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.", :content_type => :text, :license_id => LICENSE_ID)

This will return an object containing information extracted from the RDF response.

== Requirements

* Ruby 1.8.5 or better
Expand Down
8 changes: 8 additions & 0 deletions lib/calais.rb
Expand Up @@ -6,6 +6,7 @@

require 'rubygems'
require 'xml/libxml'
require 'json'
require 'curb'

$KCODE = "UTF8"
Expand All @@ -14,6 +15,7 @@
$:.unshift File.expand_path(File.dirname(__FILE__)) + '/calais'

require 'client'
require 'response'

module Calais
REST_ENDPOINT = "http://api.opencalais.com/enlighten/rest/"
Expand Down Expand Up @@ -43,6 +45,12 @@ module Calais

class << self
def enlighten(*args, &block); Client.new(*args, &block).enlighten; end

def process_document(*args, &block)
client = Client.new(*args, &block)
client.output_format = :rdf
Response.new(client.enlighten)
end
end
end

Expand Down
149 changes: 149 additions & 0 deletions lib/calais/response.rb
@@ -0,0 +1,149 @@
module Calais
class Response
MATCHERS = {
:docinfo => 'DocInfo',
:docinfometa => 'DocInfoMeta',
:defaultlangid => 'DefaultLangId',
:doccat => 'DocCat',
:entities => 'type/em/e',
:relations => 'type/em/r',
:geographies => 'type/er',
:instances => 'type/sys/InstanceInfo',
:relevances => 'type/sys/RelevanceInfo',
}

attr_accessor :hashes, :entities, :relations, :geographies

def initialize(rdf_string)
@raw_response = rdf_string

@hashes = []
@entities = []
@relations = []
@geographies = []

extract_data
process_entities
process_relations
process_geographies
end

class Entity
attr_accessor :hash, :type, :attributes
end

class Relation
attr_accessor :hash, :type, :attributes
end

class Geography
attr_accessor :name, :hash, :attributes
end

class CalaisHash
attr_accessor :value

def self.find_or_create(hash, hashes)
selected = hashes.select {|h| h.value }

if selected.empty?
new_hash = self.new
new_hash.value = hash
hashes << new_hash
new_hash
else
selected.first
end
end
end

private
def extract_data
doc = XML::Parser.string(@raw_response).parse

@nodes = {}
@nodes[:docinfo] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..")
@nodes[:docinfo].each { |node| node.remove! }

@nodes[:docinfometa] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..")
@nodes[:docinfometa].each { |node| node.remove! }

@nodes[:defaultlangid] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..")
@nodes[:defaultlangid].each { |node| node.remove! }

@nodes[:doccat] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..")
@nodes[:doccat].each { |node| node.remove! }

@nodes[:entities] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..")
@nodes[:entities].each { |node| node.remove! }

@nodes[:relations] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..")
@nodes[:relations].each { |node| node.remove! }

@nodes[:geographies] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..")
@nodes[:geographies].each { |node| node.remove! }

@nodes[:instances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..")
@nodes[:instances].each { |node| node.remove! }

@nodes[:relevances] = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..")
@nodes[:relevances].each { |node| node.remove! }

@nodes[:others] = doc.root.find("./*")
@nodes[:others].each { |node| node.remove! }

return
end

def extract_attributes(nodes)
nodes.inject({}) do |hsh, node|
value = if node['resource']
extracted_hash = node['resource'].split('/')[-1] rescue nil
CalaisHash.find_or_create(extracted_hash, @hashes)
else
node.content
end
hsh.merge(node.name => value)
end
end

def process_entities
@entities = @nodes[:entities].map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil

entity = Entity.new
entity.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
entity.type = node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1] rescue nil
entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))

entity
end
end

def process_relations
@relations = @nodes[:relations].map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil

relation = Relation.new
relation.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
relation.type = node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1] rescue nil
relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))

relation
end
end

def process_geographies
@geographies = @nodes[:geographies].map do |node|
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))

geography = Geography.new
geography.name = attributes.delete('name')
geography.hash = attributes.delete('subject')
geography.attributes = attributes

geography
end
end
end
end
29 changes: 29 additions & 0 deletions spec/calais/response_spec.rb
@@ -0,0 +1,29 @@
require File.join(File.dirname(__FILE__), %w[.. helper])

describe Calais::Response, :new do
it 'accepts a json string to generate the response object' do
lambda { Calais::Response.new(SAMPLE_RESPONSE) }.should_not raise_error
end
end


describe Calais::Response, :new do
before :all do
@response = Calais::Response.new(SAMPLE_RESPONSE)
end

it 'should extract entities' do
entities = @response.entities
entities.map { |e| e.type }.sort.uniq.should == %w[City Continent Country IndustryTerm Organization Person ProvinceOrState]
end

it 'should extract relations' do
relations = @response.relations
relations.map { |e| e.type }.sort.uniq.should == %w[GenericRelations PersonAttributes PersonProfessional Quotation]
end

it 'should extract geographies' do
geographies = @response.geographies
geographies.map { |e| e.name }.sort.uniq.should == %w[Australia Hobart,Tasmania,Australia Tasmania,Australia]
end
end

0 comments on commit b5a1549

Please sign in to comment.