Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Implemented categorization for text

  • Loading branch information...
commit 1b61014c8908a8485794f836464253b5531138f8 1 parent 6514f83
@dbalatero authored
View
2  lib/alchemy_api.rb
@@ -12,11 +12,13 @@ module AlchemyApi
@api_key = nil
@base_uri = "http://access.alchemyapi.com/calls/url"
@base_html_uri = "http://access.alchemyapi.com/calls/html"
+ @base_text_uri = "http://access.alchemyapi.com/calls/text"
class << self
attr_accessor :api_key
attr_accessor :base_uri
attr_accessor :base_html_uri
+ attr_accessor :base_text_uri
end
class UnknownError < StandardError; end
View
21 lib/alchemy_api/base.rb
@@ -4,5 +4,26 @@ class Base < MonsterMash::Base
cache_timeout 999999
user_agent 'Ruby AlchemyApi'
end
+
+ def self.check_json_for_errors_and_raise!(json)
+ if json['status'] == 'ERROR'
+ case json['statusInfo']
+ when 'invalid-api-key'
+ raise InvalidApiKeyError, "The API key you sent (#{AlchemyApi.api_key.inspect}) is invalid! Please set AlchemyApi.api_key!"
+ when 'cannot-retrieve'
+ raise CannotRetrieveUrlError, "The URL (#{json['url']}) could not be retrieved."
+ when 'cannot-retrieve:http-redirect-limit'
+ raise RedirectionLimitError, "The URL (#{json['url']}) could not be retrieved, as it reached a redirect limit."
+ when 'page-is-not-html'
+ raise PageIsNotValidHtmlError, "The page at #{json['url']} is not valid HTML!"
+ when 'content-exceeds-size-limit'
+ raise ContentExceedsMaxLimitError, "The page at #{json['url']} is larger than 600KB!"
+ when 'invalid-html'
+ raise InvalidHtmlError, "The HTML sent was invalid!"
+ else
+ raise UnknownError, "Got an unknown error: #{json['statusInfo']}"
+ end
+ end
+ end
end
end
View
22 lib/alchemy_api/categorization.rb
@@ -1,4 +1,24 @@
module AlchemyApi
- class Categorization
+ Category = Struct.new(:url, :name, :score)
+
+ class Categorization < Base
+ post(:get_categorization_from_text) do |text, *args|
+ options = args.first || {}
+ uri "#{AlchemyApi.base_text_uri}/TextGetCategory"
+ params :apikey => AlchemyApi.api_key,
+ :text => text,
+ :url => options[:url] || '',
+ :outputMode => 'json'
+ handler do |response|
+ AlchemyApi::Categorization.get_categorization_handler(response)
+ end
+ end
+
+ def self.get_categorization_handler(response)
+ json = JSON.parse(response.body)
+ check_json_for_errors_and_raise!(json)
+ Category.new(json['url'], json['category'],
+ json['score'].to_f)
+ end
end
end
View
23 lib/alchemy_api/text_extraction.rb
@@ -83,7 +83,6 @@ class TextExtraction < Base
end
end
-
def self.get_title_from_url_handler(response)
json = JSON.parse(response.body)
check_json_for_errors_and_raise!(json)
@@ -95,27 +94,5 @@ def self.get_text_from_url_handler(response)
check_json_for_errors_and_raise!(json)
ExtractedText.new(json['url'], json['text'])
end
-
- private
- def self.check_json_for_errors_and_raise!(json)
- if json['status'] == 'ERROR'
- case json['statusInfo']
- when 'invalid-api-key'
- raise InvalidApiKeyError, "The API key you sent (#{AlchemyApi.api_key.inspect}) is invalid! Please set AlchemyApi.api_key!"
- when 'cannot-retrieve'
- raise CannotRetrieveUrlError, "The URL (#{json['url']}) could not be retrieved."
- when 'cannot-retrieve:http-redirect-limit'
- raise RedirectionLimitError, "The URL (#{json['url']}) could not be retrieved, as it reached a redirect limit."
- when 'page-is-not-html'
- raise PageIsNotValidHtmlError, "The page at #{json['url']} is not valid HTML!"
- when 'content-exceeds-size-limit'
- raise ContentExceedsMaxLimitError, "The page at #{json['url']} is larger than 600KB!"
- when 'invalid-html'
- raise InvalidHtmlError, "The HTML sent was invalid!"
- else
- raise UnknownError, "Got an unknown error: #{json['statusInfo']}"
- end
- end
- end
end
end
View
15 spec/alchemy_api/categorization_spec.rb
@@ -1,4 +1,19 @@
require File.dirname(__FILE__) + "/../spec_helper"
describe AlchemyApi::Categorization do
+ typhoeus_spec_cache('spec/cache/categorization/get_categorization_from_text') do |hydra|
+ describe "#get_categorization_from_text" do
+ before(:each) do
+ @url = "http://test.com"
+ text = fixture_for('article.txt')
+
+ @category = AlchemyApi::Categorization.
+ get_categorization_from_text(text)
+ end
+
+ it "should return a category name" do
+ @category.name.should_not be_nil
+ end
+ end
+ end
end
View
29 ...ategorization/get_categorization_from_text/8b476a3b532afd2da646b145e9dde07570c27352.cache
@@ -0,0 +1,29 @@
+u:Typhoeus::Response�---
+:headers: |
+ HTTP/1.1 100 Continue
+
+ HTTP/1.1 200 OK
+ Server: apgrid
+ Date: Fri, 30 Apr 2010 00:06:04 GMT
+ Content-Type: application/json
+ Connection: keep-alive
+ Content-Length: 328
+ Cache-Control: max-age=600
+ Expires: Fri, 30 Apr 2010 00:16:04 GMT
+
+
+:code: 200
+:requested_http_method:
+:time: 0.302073
+:body: |
+ {
+ "status": "OK",
+ "usage": "By accessing AlchemyAPI or using information generated by AlchemyAPI, you are agreeing to be bound by the AlchemyAPI Terms of Use: http://www.alchemyapi.com/company/terms.html",
+ "url": "",
+ "language": "english",
+ "category": "arts_entertainment",
+ "score": "0.841536"
+ }
+
+:start_time:
+:requested_url:
View
9 spec/fixtures/article.txt
@@ -0,0 +1,9 @@
+Bed and Breakfast locations are trade marked by their small size, antique furniture and homey feel. If this is the kind of B&B you are looking for, then The Custer House is where you should go. It is a five-minute walk from the beach, but in this turn of the century modified Queen Anne residence you'll be begging to stay longer.
+
+She may not look like much on the outside, but visiting 10th Avenue Inn Bed and Breakfast will be worth the visit inside. A stunning panoramic view of the ocean that few can rival cheers guests from this quiet little Inn. Not only is the view pleasant, but the meals and tea are something to look forward to.
+
+The Guest House Bed and Breakfast is the coziest and homiest of the B&B's in Seaside, Oregon. The warm wood paneling of the house is cheerful and clean-looking. A unique feature is the front facing balcony that invites you to enjoy the views day and night.
+
+One cannot mention Seaside B&B's without mentioning The Gilbert Inn Bed and Breakfast. This is the most recommended location for romance and history. This Victorian style B&B right on the Promenade is quaint and scenic in all the best ways. The Turret Room offers a special way to spend your night and morning with a beautiful view of the ocean. This B&B is a must for those visiting Historical sites like the Butterfield Cottage and The Saltworks.
+
+Seaside B&B's are wonderful little getaways for the romantic couple, the history buff and those people who simply enjoy a good old-fashioned vacation.
Please sign in to comment.
Something went wrong with that request. Please try again.