Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add support for the Concept Tagging API
- Loading branch information
Showing
5 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# TODO: add support for linkedData return fields | ||
module AlchemyApi | ||
ConceptTaggingResult = Struct.new(:concepts, :language, :url, :source_text) | ||
Concept = Struct.new(:text, :relevance) | ||
|
||
class ConceptTagging < Base | ||
# http://www.alchemyapi.com/api/concept/textc.html | ||
post(:get_concepts_from_text) do |text, *args| | ||
options = args.first || {} | ||
uri "#{AlchemyApi.base_text_uri}/TextGetRankedConcepts" | ||
params :text => text, | ||
:maxRetrieve => options[:max_retrieve] || 10, | ||
:linkedData => 0, | ||
:showSourceText => options[:show_source_text] ? 1 : 0 | ||
handler do |response| | ||
AlchemyApi::ConceptTagging.get_concepts_handler(response) | ||
end | ||
end | ||
|
||
# http://www.alchemyapi.com/api/concept/urls.html | ||
post(:get_concepts_from_url) do |url, *args| | ||
options = args.first || {} | ||
uri "#{AlchemyApi.base_uri}/URLGetRankedConcepts" | ||
params :url => url, | ||
:maxRetrieve => options[:max_retrieve] || 10, | ||
:linkedData => 0, | ||
:showSourceText => options[:show_source_text] ? 1 : 0, | ||
:sourceText => options[:source_text] || 'cleaned_or_raw', | ||
:cquery => options[:cquery], | ||
:xpath => options[:xpath] | ||
handler do |response| | ||
AlchemyApi::ConceptTagging.get_concepts_handler(response) | ||
end | ||
end | ||
|
||
# http://www.alchemyapi.com/api/concept/htmlc.html | ||
post(:get_concepts_from_html) do |html, *args| | ||
options = args.first || {} | ||
uri "#{AlchemyApi.base_html_uri}/HTMLGetRankedConcepts" | ||
params :html => html, | ||
:maxRetrieve => options[:max_retrieve] || 10, | ||
:linkedData => 0, | ||
:showSourceText => options[:show_source_text] ? 1 : 0, | ||
:sourceText => options[:source_text] || 'cleaned_or_raw', | ||
:cquery => options[:cquery], | ||
:xpath => options[:xpath] | ||
handler do |response| | ||
AlchemyApi::ConceptTagging.get_concepts_handler(response) | ||
end | ||
end | ||
|
||
def self.get_concepts_handler(response) | ||
json = get_json(response) | ||
concepts = json['concepts'].map do |c| | ||
Concept.new(c['text'], c['relevance'].to_f) | ||
end | ||
ConceptTaggingResult.new(concepts, json['language'], json['url'], json['text']) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
require File.dirname(__FILE__) + "/../spec_helper" | ||
|
||
describe AlchemyApi::ConceptTagging do | ||
typhoeus_spec_cache('spec/cache/concept_tagging/get_concepts_from_text') do |hydra| | ||
describe "#get_concepts_from_text" do | ||
before(:each) do | ||
@url = "http://test.com" | ||
text = fixture_for('article.txt') | ||
|
||
@results = AlchemyApi::ConceptTagging. | ||
get_concepts_from_text(text) | ||
end | ||
|
||
it "should return at least one concept" do | ||
@results.concepts.should_not be_empty | ||
end | ||
end | ||
end | ||
|
||
typhoeus_spec_cache('spec/cache/concept_tagging/get_concepts_from_url') do |hydra| | ||
describe "#get_concepts_from_url" do | ||
before(:each) do | ||
@url = 'http://www.macrumors.com/2010/04/30/apples-discontinuation-of-lala-streaming-music-service-not-likely-leading-to-imminent-launch-of-web-focused-itunes/' | ||
@results = AlchemyApi::ConceptTagging. | ||
get_concepts_from_url(@url, | ||
:source_text => 'cleaned_or_raw') | ||
end | ||
|
||
it "should return at least one concept" do | ||
@results.concepts.should_not be_empty | ||
end | ||
end | ||
end | ||
|
||
typhoeus_spec_cache('spec/cache/concept_tagging/get_concepts_from_html') do |hydra| | ||
describe "#get_concepts_from_html" do | ||
before(:each) do | ||
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html" | ||
@html = fixture_for('bp_spill.html') | ||
@results = AlchemyApi::ConceptTagging. | ||
get_concepts_from_html(@html, :url => @url, | ||
:source_text => 'cleaned_or_raw') | ||
end | ||
|
||
it "should return at least one concept" do | ||
@results.concepts.should_not be_empty | ||
end | ||
end | ||
end | ||
end |
61 changes: 61 additions & 0 deletions
61
...che/concept_tagging/get_concepts_from_html/7da223513abcfe2fd3d253fff60894829f858667.cache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
u:Typhoeus::Response�--- | ||
:requested_http_method: | ||
:headers: | | ||
HTTP/1.1 100 Continue | ||
|
||
HTTP/1.1 200 OK | ||
Server: nginx | ||
Date: Sat, 11 Sep 2010 03:55:18 GMT | ||
Content-Type: application/json | ||
Transfer-Encoding: chunked | ||
Connection: keep-alive | ||
Cache-Control: no-cache | ||
Content-Encoding: gzip | ||
|
||
|
||
:requested_url: | ||
:body: | | ||
{ | ||
"status": "OK", | ||
"usage": "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of Use: http://www.alchemyapi.com/company/terms.html", | ||
"url": "", | ||
"language": "english", | ||
"concepts": [ | ||
{ | ||
"text": "Robert Gibbs", | ||
"relevance": "0.958553" | ||
}, | ||
{ | ||
"text": "Bobby Jindal", | ||
"relevance": "0.856261" | ||
}, | ||
{ | ||
"text": "Barack Obama", | ||
"relevance": "0.854173" | ||
}, | ||
{ | ||
"text": "White House Press Secretary", | ||
"relevance": "0.820978" | ||
}, | ||
{ | ||
"text": "Petroleum", | ||
"relevance": "0.702847" | ||
}, | ||
{ | ||
"text": "Ken Salazar", | ||
"relevance": "0.66043" | ||
}, | ||
{ | ||
"text": "ExxonMobil", | ||
"relevance": "0.621805" | ||
}, | ||
{ | ||
"text": "Gulf of Mexico", | ||
"relevance": "0.590221" | ||
} | ||
] | ||
} | ||
|
||
:code: 200 | ||
:start_time: | ||
:time: 2.276884 |
48 changes: 48 additions & 0 deletions
48
...che/concept_tagging/get_concepts_from_text/3e56901d7f01729b79ef73fc6d34fa97148eb35c.cache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
u:Typhoeus::Responses--- | ||
:requested_http_method: | ||
:body: | | ||
{ | ||
"status": "OK", | ||
"usage": "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of Use: http://www.alchemyapi.com/company/terms.html", | ||
"url": "", | ||
"language": "english", | ||
"concepts": [ | ||
{ | ||
"text": "Bed and breakfast", | ||
"relevance": "0.961337" | ||
}, | ||
{ | ||
"text": "Hotel", | ||
"relevance": "0.59768" | ||
}, | ||
{ | ||
"text": "Breakfast", | ||
"relevance": "0.536192" | ||
}, | ||
{ | ||
"text": "Inn", | ||
"relevance": "0.535763" | ||
}, | ||
{ | ||
"text": "Lodging", | ||
"relevance": "0.511123" | ||
} | ||
] | ||
} | ||
|
||
:requested_url: | ||
:start_time: | ||
:code: 200 | ||
:time: 0.429581 | ||
:headers: | | ||
HTTP/1.1 100 Continue | ||
|
||
HTTP/1.1 200 OK | ||
Server: nginx | ||
Date: Sat, 11 Sep 2010 03:54:47 GMT | ||
Content-Type: application/json | ||
Connection: keep-alive | ||
Content-Length: 754 | ||
Cache-Control: no-cache | ||
|
||
|
46 changes: 46 additions & 0 deletions
46
...ache/concept_tagging/get_concepts_from_url/488c7dbb9b844b072cb284425b4ca8356b08dd15.cache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
u:Typhoeus::Response�--- | ||
:requested_http_method: | ||
:headers: | | ||
HTTP/1.1 200 OK | ||
Server: nginx | ||
Date: Sat, 11 Sep 2010 03:55:16 GMT | ||
Content-Type: application/json | ||
Connection: keep-alive | ||
Content-Length: 921 | ||
Cache-Control: no-cache | ||
|
||
|
||
:requested_url: | ||
:body: | | ||
{ | ||
"status": "OK", | ||
"usage": "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of Use: http://www.alchemyapi.com/company/terms.html", | ||
"url": "http://www.macrumors.com/2010/04/30/apples-discontinuation-of-lala-streaming-music-service-not-likely-leading-to-imminent-launch-of-web-focused-itunes/", | ||
"language": "english", | ||
"concepts": [ | ||
{ | ||
"text": "EMI", | ||
"relevance": "0.926603" | ||
}, | ||
{ | ||
"text": "Record label", | ||
"relevance": "0.917238" | ||
}, | ||
{ | ||
"text": "Music industry", | ||
"relevance": "0.754385" | ||
}, | ||
{ | ||
"text": "ITunes Store", | ||
"relevance": "0.734244" | ||
}, | ||
{ | ||
"text": "Streaming media", | ||
"relevance": "0.655383" | ||
} | ||
] | ||
} | ||
|
||
:code: 200 | ||
:start_time: | ||
:time: 0.313919 |