From 0108e6db2eb5d1093e431b69ce96771ea39039f8 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Mon, 15 Aug 2016 19:28:21 -0600 Subject: [PATCH 1/9] Language setup Call annotation and get response object modeled out. Model Document to hold text vs. html, and content vs. GSC url. --- google-cloud-language/Rakefile | 15 +- .../acceptance/language/language_test.rb | 86 +++++++ .../acceptance/language_helper.rb | 68 +++++ .../lib/google-cloud-language.rb | 4 +- .../lib/google/cloud/language/annotation.rb | 235 ++++++++++++++++++ .../lib/google/cloud/language/credentials.rb | 9 +- .../lib/google/cloud/language/document.rb | 84 +++++++ .../lib/google/cloud/language/project.rb | 96 +++++++ .../lib/google/cloud/language/service.rb | 51 +++- 9 files changed, 632 insertions(+), 16 deletions(-) create mode 100644 google-cloud-language/acceptance/language/language_test.rb create mode 100644 google-cloud-language/acceptance/language_helper.rb create mode 100644 google-cloud-language/lib/google/cloud/language/annotation.rb create mode 100644 google-cloud-language/lib/google/cloud/language/document.rb diff --git a/google-cloud-language/Rakefile b/google-cloud-language/Rakefile index 3e0427b75525..83629a930d4d 100644 --- a/google-cloud-language/Rakefile +++ b/google-cloud-language/Rakefile @@ -25,7 +25,20 @@ end # Acceptance tests desc "Runs the language acceptance tests." -task :acceptance do +task :acceptance, :project, :keyfile do |t, args| + project = args[:project] + project ||= ENV["GCLOUD_TEST_PROJECT"] || ENV["LANGUAGE_TEST_PROJECT"] + keyfile = args[:keyfile] + keyfile ||= ENV["GCLOUD_TEST_KEYFILE"] || ENV["LANGUAGE_TEST_KEYFILE"] + if project.nil? || keyfile.nil? + fail "You must provide a project and keyfile. e.g. rake acceptance[test123, /path/to/keyfile.json] or LANGUAGE_TEST_PROJECT=test123 LANGUAGE_TEST_KEYFILE=/path/to/keyfile.json rake acceptance" + end + # always overwrite when running tests + ENV["LANGUAGE_PROJECT"] = project + ENV["LANGUAGE_KEYFILE"] = keyfile + + $LOAD_PATH.unshift "lib", "acceptance" + Dir.glob("acceptance/**/*_test.rb").each { |file| require_relative file } end namespace :acceptance do diff --git a/google-cloud-language/acceptance/language/language_test.rb b/google-cloud-language/acceptance/language/language_test.rb new file mode 100644 index 000000000000..f77774e5a239 --- /dev/null +++ b/google-cloud-language/acceptance/language/language_test.rb @@ -0,0 +1,86 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello} #{sayhi} #{ruby}" } + + it "annotation without creating a document" do + annotation = language.annotate content + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "annotation with creating a document" do + doc = language.document content + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/acceptance/language_helper.rb b/google-cloud-language/acceptance/language_helper.rb new file mode 100644 index 000000000000..84d776ee72c6 --- /dev/null +++ b/google-cloud-language/acceptance/language_helper.rb @@ -0,0 +1,68 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +gem "minitest" +require "minitest/autorun" +require "minitest/focus" +require "minitest/rg" +require "google/cloud/language" + +# Create shared language object so we don't create new for each test +$language = Google::Cloud.language retries: 10 + +module Acceptance + ## + # Test class for running against a Language instance. + # Ensures that there is an active connection for the tests to use. + # + # This class can be used with the spec DSL. + # To do so, add :language to describe: + # + # describe "My Language Test", :language do + # it "does a thing" do + # your.code.must_be :thing? + # end + # end + class LanguageTest < Minitest::Test + attr_accessor :language + + ## + # Setup project based on available ENV variables + def setup + @language = $language + + refute_nil @language, "You do not have an active language to run the tests." + + super + end + + # Add spec DSL + extend Minitest::Spec::DSL + + # Register this spec type for when :language is used. + register_spec_type(self) do |desc, *addl| + addl.include? :language + end + end + + def self.run_one_method klass, method_name, reporter + result = nil + (1..3).each do |try| + result = Minitest.run_one_method(klass, method_name) + break if (result.passed? || result.skipped?) + puts "Retrying #{klass}##{method_name} (#{try})" + end + reporter.record result + end +end diff --git a/google-cloud-language/lib/google-cloud-language.rb b/google-cloud-language/lib/google-cloud-language.rb index 0893811e47b3..c30a28c98e1b 100644 --- a/google-cloud-language/lib/google-cloud-language.rb +++ b/google-cloud-language/lib/google-cloud-language.rb @@ -37,7 +37,7 @@ module Cloud # # The default scope is: # - # * `https://www.googleapis.com/auth/language` + # * `"https://www.googleapis.com/auth/cloud-platform"` # @param [Integer] retries Number of times to retry requests on server # error. The default value is `3`. Optional. # @param [Integer] timeout Default timeout to use in requests. Optional. @@ -83,7 +83,7 @@ def language scope: nil, retries: nil, timeout: nil # # The default scope is: # - # * `https://www.googleapis.com/auth/language` + # * `"https://www.googleapis.com/auth/cloud-platform"` # @param [Integer] retries Number of times to retry requests on server # error. The default value is `3`. Optional. # @param [Integer] timeout Default timeout to use in requests. Optional. diff --git a/google-cloud-language/lib/google/cloud/language/annotation.rb b/google-cloud-language/lib/google/cloud/language/annotation.rb new file mode 100644 index 000000000000..563740dfd5da --- /dev/null +++ b/google-cloud-language/lib/google/cloud/language/annotation.rb @@ -0,0 +1,235 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +require "google/cloud/core/grpc_utils" + +module Google + module Cloud + module Language + ## + # # Annotation + # + # The results of all requested document annotations. + # + # See {Project#annotate} and {Document#annotate}. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello world!" + # + # annotation = language.annotate doc + # annotation.thing #=> Some Result + # + class Annotation + ## + # @private The AnnotateTextResponse Google API Client object. + attr_accessor :grpc + + ## + # @private Creates a new Annotation instance. + def initialize + @grpc = nil + end + + def sentences + @sentences ||= begin + Array(grpc.sentences).map { |g| TextSpan.from_grpc g.text } + end + end + + def tokens + @tokens ||= Array(grpc.tokens).map { |g| Token.from_grpc g } + end + + def entities + @entities ||= Entities.from_grpc @grpc + end + + def sentiment + return nil if @grpc.document_sentiment.nil? + @sentiment ||= Sentiment.from_grpc @grpc + end + + def language + @grpc.language + end + + ## + # @private New Annotation from a V1beta1::AnnotateTextResponse object. + def self.from_grpc grpc + new.tap { |a| a.instance_variable_set :@grpc, grpc } + end + + class TextSpan + attr_reader :text, :offset + alias_method :content, :text + alias_method :begin_offset, :offset + + ## + # @private Creates a new Token instance. + def initialize text, offset + @text = text + @offset = offset + end + + ## + # @private New TextSpan from a V1beta1::TextSpan object. + def self.from_grpc grpc + new grpc.content, grpc.begin_offset + end + end + + class Token + attr_reader :text_span, :part_of_speech, :head_token_index, :label, + :lemma + + ## + # @private Creates a new Token instance. + def initialize text_span, part_of_speech, head_token_index, label, + lemma + @text_span = text_span + @part_of_speech = part_of_speech + @head_token_index = head_token_index + @label = label + @lemma = lemma + end + + def text + @text_span.text + end + alias_method :content, :text + + def offset + @text_span.offset + end + alias_method :begin_offset, :offset + + ## + # @private New Token from a V1beta1::Token object. + def self.from_grpc grpc + text_span = TextSpan.from_grpc grpc.text + new text_span, grpc.part_of_speech.tag, + grpc.dependency_edge.head_token_index, + grpc.dependency_edge.label, grpc.lemma + end + end + + class Entities < DelegateClass(::Array) + attr_accessor :language + + ## + # @private Create a new Entities with an array of Entity instances. + def initialize entities = [], language = nil + super entities + @language = language + end + + def unknown + select { |e| e.type == :UNKNOWN } + end + + def people + select { |e| e.type == :PERSON } + end + + def locations + select { |e| e.type == :LOCATION } + end + alias_method :places, :locations + + def organizations + select { |e| e.type == :ORGANIZATION } + end + + def events + select { |e| e.type == :EVENT } + end + + def artwork + select { |e| e.type == :WORK_OF_ART } + end + + def goods + select { |e| e.type == :CONSUMER_GOOD } + end + + def other + select { |e| e.type == :OTHER } + end + + ## + # @private New Entities from a V1beta1::AnnotateTextResponse or + # V1beta1::AnalyzeEntitiesResponse object. + def self.from_grpc grpc + entities = Array(grpc.entities).map { |g| Entity.from_grpc g } + new entities, grpc.language + end + end + + class Entity + attr_reader :name, :type, :metadata, :salience, :mentions + + ## + # @private Creates a new Entity instance. + def initialize name, type, metadata, salience, mentions + @name = name + @type = type + @metadata = metadata + @salience = salience + @mentions = mentions + end + + def wikipedia_url + metadata["wikipedia_url"] + end + + ## + # @private New Entity from a V1beta1::Entity object. + def self.from_grpc grpc + metadata = Core::GRPCUtils.map_to_hash grpc.metadata + mentions = Array(grpc.mentions).map do |g| + TextSpan.from_grpc g.text + end + new grpc.name, grpc.type, metadata, grpc.salience, mentions + end + end + + class Sentiment + attr_reader :polarity, :magnitude, :language + + ## + # @private Creates a new Sentiment instance. + def initialize polarity, magnitude, language + @polarity = polarity + @magnitude = magnitude + @language = language + end + + ## + # @private New Sentiment from a V1beta1::AnnotateTextResponse or + # V1beta1::AnalyzeSentimentResponse object. + def self.from_grpc grpc + new grpc.document_sentiment.polarity, + grpc.document_sentiment.magnitude, grpc.language + end + end + end + end + end +end diff --git a/google-cloud-language/lib/google/cloud/language/credentials.rb b/google-cloud-language/lib/google/cloud/language/credentials.rb index ded9b065c0a6..4223968ef9ea 100644 --- a/google-cloud-language/lib/google/cloud/language/credentials.rb +++ b/google-cloud-language/lib/google/cloud/language/credentials.rb @@ -14,6 +14,7 @@ require "google/cloud/credentials" +require "google/cloud/language/v1beta1/language_service_api" module Google module Cloud @@ -21,10 +22,10 @@ module Language ## # @private Represents the OAuth 2.0 signing logic for Language. class Credentials < Google::Cloud::Credentials - SCOPE = ["https://www.googleapis.com/auth/language"] - PATH_ENV_VARS = %w(LANGUAGE_KEYFILE GOOGLE_CLOUD_KEYFILE GCLOUD_KEYFILE) - JSON_ENV_VARS = %w(LANGUAGE_KEYFILE_JSON GOOGLE_CLOUD_KEYFILE_JSON - GCLOUD_KEYFILE_JSON) + SCOPE = Google::Cloud::Language::V1beta1::LanguageServiceApi::ALL_SCOPES + PATH_ENV_VARS = %w(LANGUAGE_KEYFILE GCLOUD_KEYFILE GOOGLE_CLOUD_KEYFILE) + JSON_ENV_VARS = %w(LANGUAGE_KEYFILE_JSON GCLOUD_KEYFILE_JSON + GOOGLE_CLOUD_KEYFILE_JSON) end end end diff --git a/google-cloud-language/lib/google/cloud/language/document.rb b/google-cloud-language/lib/google/cloud/language/document.rb new file mode 100644 index 000000000000..11c4fe843d4b --- /dev/null +++ b/google-cloud-language/lib/google/cloud/language/document.rb @@ -0,0 +1,84 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +require "google/cloud/language/annotation" + +module Google + module Cloud + module Language + ## + # # Document + # + # Represents an document for the Language service. + # + # See {Project#document}. + # + # TODO: Overview + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello world!" + # + # annotation = language.annotate doc + # annotation.thing #=> Some Result + # + class Document + ## + # @private Creates a new Document instance. + def initialize + @grpc = nil + @service = nil + end + + ## + # @private New gRPC object. + def to_grpc + @grpc + end + + ## + # @private + def self.from_grpc grpc, service + new.tap do |i| + i.instance_variable_set :@grpc, grpc + i.instance_variable_set :@service, service + end + end + + ## + # @private + def self.from_source source, service, format: nil, language: nil + source = String source + grpc = Google::Cloud::Language::V1beta1::Document.new( + content: source, type: :PLAIN_TEXT + ) + from_grpc grpc, service + end + + protected + + ## + # Raise an error unless an active language project object is available. + def ensure_service! + fail "Must have active connection" unless @service + end + end + end + end +end diff --git a/google-cloud-language/lib/google/cloud/language/project.rb b/google-cloud-language/lib/google/cloud/language/project.rb index 2425ea4e8424..f45b0c1593b4 100644 --- a/google-cloud-language/lib/google/cloud/language/project.rb +++ b/google-cloud-language/lib/google/cloud/language/project.rb @@ -16,6 +16,8 @@ require "google/cloud/errors" require "google/cloud/core/gce" require "google/cloud/language/service" +require "google/cloud/language/document" +require "google/cloud/language/annotation" module Google module Cloud @@ -70,6 +72,100 @@ def self.default_project Google::Cloud::Core::GCE.project_id end + ## + # Returns a new document from the given content. + # + # TODO: Details + # + # @param [String, Google::Cloud::Storage::File] content A string of text + # to be annotated, or a Cloud Storage URI of the form + # `"gs://bucketname/path/to/document.ext"`; or an instance of + # Google::Cloud::Storage::File of the text to be annotated. + # + # @return [Document] An document for the Language service. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "it was the best of times, it was..." + # + # @example With a Google Cloud Storage URI: + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "gs://bucket-name/path/to/document" + # + # @example With a Google Cloud Storage File object: + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # storage = gcloud.storage + # + # bucket = storage.bucket "bucket-name" + # file = bucket.file "path/to/document" + # + # language = gcloud.language + # + # doc = language.document file + # + def document content, format: nil, language: nil + return content if content.is_a? Document + Document.from_source content, @service, format: format, + language: language + end + alias_method :doc, :document + + ## + # TODO: Details + # + # @param [String, Document, Google::Cloud::Storage::File] content The + # content to annotate. This can be an {Document} instance, or any + # other type that converts to an {Document}. See {#document} for + # details. + # @param [Boolean] text Whether to perform the textual analysis. + # Optional. + # @param [Boolean] entities Whether to perform the entitiy analysis. + # Optional. + # @param [Boolean] sentiment Whether to perform the sentiment analysis. + # Optional. + # @param [String] format The format of the document (TEXT/HTML). + # Optional. + # @param [String] language The language of the document (if not + # specified, the language is automatically detected). Both ISO and + # BCP-47 language codes are accepted. Optional. + # @param [String] encoding The encoding type used by the API to + # calculate offsets. Optional. + # + # @return [Annotation>] The results for the content analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello world!" + # + # annotation = language.annotate doc + # annotation.thing #=> Some Result + # + def annotate content, text: false, entities: false, sentiment: false, + format: nil, language: nil, encoding: nil + ensure_service! + doc = document content, language: language, format: format + grpc = service.annotate doc.to_grpc, text: text, entities: entities, + sentiment: sentiment, + encoding: encoding + Annotation.from_grpc grpc + end + alias_method :mark, :annotate + alias_method :detect, :annotate + protected ## diff --git a/google-cloud-language/lib/google/cloud/language/service.rb b/google-cloud-language/lib/google/cloud/language/service.rb index 9fce1e3846ac..acaf35a0277a 100644 --- a/google-cloud-language/lib/google/cloud/language/service.rb +++ b/google-cloud-language/lib/google/cloud/language/service.rb @@ -14,10 +14,9 @@ require "google/cloud/errors" -require "google/cloud/core/grpc_backoff" require "google/cloud/language/credentials" require "google/cloud/language/version" -# require "google/language/v1/language_services" +require "google/cloud/language/v1beta1/language_service_api" module Google module Cloud @@ -34,7 +33,7 @@ def initialize project, credentials, host: nil, retries: nil, timeout: nil @project = project @credentials = credentials - @host = host || "language.googleapis.com" + @host = host || V1beta1::LanguageServiceApi::SERVICE_ADDRESS @retries = retries @timeout = timeout end @@ -47,8 +46,13 @@ def creds def service return mocked_service if mocked_service - @service ||= Google::Language::V1::Subscriber::Stub.new( - host, creds, timeout: timeout) + @service ||= V1beta1::LanguageServiceApi.new( + service_path: host, + chan_creds: creds, + timeout: timeout, + app_name: "google-cloud-language", + app_version: Google::Cloud::Language::VERSION) + # TODO: Get retries configured end attr_accessor :mocked_service @@ -56,18 +60,47 @@ def insecure? credentials == :this_channel_is_insecure end + ## + # Returns API::BatchAnnotateImagesResponse + def annotate doc_grpc, text: false, entities: false, sentiment: false, + encoding: nil + if text == false && entities == false && sentiment == false + text = true + entities = true + sentiment = true + end + features = V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: text, extract_entities: entities, + extract_document_sentiment: sentiment) + encoding = verify_encoding! encoding + execute { service.annotate_text doc_grpc, features, encoding } + end + + def entities doc_grpc, encoding: nil + encoding = verify_encoding! encoding + execute { service.analyze_entities doc_grpc, encoding } + end + + def sentiment doc_grpc + execute { service.analyze_sentiment doc_grpc } + end + def inspect "#{self.class}(#{@project})" end protected + def verify_encoding! encoding + # TODO: verify encoding against V1beta1::EncodingType + return :UTF8 if encoding.nil? + encoding + end + def execute - Google::Cloud::Core::GrpcBackoff.new(retries: retries).execute do - yield - end + yield rescue GRPC::BadStatus => e - raise Error.from_error(e) + raise Google::Cloud::Error.from_error(e) end end end From 480380f3ce37a5920fdb22889f0b01894c92acdd Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 16 Aug 2016 14:22:28 -0600 Subject: [PATCH 2/9] Add support for HTML content --- .../acceptance/language/html_test.rb | 273 ++++++++++++++++ .../acceptance/language/language_test.rb | 86 ----- .../acceptance/language/text_test.rb | 305 ++++++++++++++++++ .../lib/google/cloud/language/document.rb | 150 ++++++++- .../lib/google/cloud/language/project.rb | 76 +++++ 5 files changed, 803 insertions(+), 87 deletions(-) create mode 100644 google-cloud-language/acceptance/language/html_test.rb delete mode 100644 google-cloud-language/acceptance/language/language_test.rb create mode 100644 google-cloud-language/acceptance/language/text_test.rb diff --git a/google-cloud-language/acceptance/language/html_test.rb b/google-cloud-language/acceptance/language/html_test.rb new file mode 100644 index 000000000000..d67d7575d6fc --- /dev/null +++ b/google-cloud-language/acceptance/language/html_test.rb @@ -0,0 +1,273 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (HTML)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello}" + \ + "

#{sayhi}

#{ruby}

".encode("UTF-8") } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate content, format: :html + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document content, format: :html, language: :en + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #html helper method" do + doc = language.html content + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document content + doc.html! + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document content + doc.format = :html + doc.language = :en + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document content, format: :html + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + it "works without creating a document" do + entities = language.entities content, format: :html + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document content, format: :html + doc.must_be :html? + doc.wont_be :text? + + entities = doc.entities + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06173757091164589 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 102 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment content, format: :html + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + + it "works with creating a document" do + doc = language.document content, format: :html + doc.must_be :html? + doc.wont_be :text? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + end +end diff --git a/google-cloud-language/acceptance/language/language_test.rb b/google-cloud-language/acceptance/language/language_test.rb deleted file mode 100644 index f77774e5a239..000000000000 --- a/google-cloud-language/acceptance/language/language_test.rb +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2016 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -require "language_helper" - -describe "Language", :language do - let(:hello) { "Hello from Chris and Mike!" } - let(:sayhi) { "If you find yourself in Utah, come say hi!" } - let(:ruby) { "We love ruby and writing code." } - let(:content) { "#{hello} #{sayhi} #{ruby}" } - - it "annotation without creating a document" do - annotation = language.annotate content - - annotation.language.must_equal "en" - - annotation.sentiment.language.must_equal "en" - annotation.sentiment.polarity.must_equal 1.0 - annotation.sentiment.magnitude.must_equal 2.0999999046325684 - - annotation.entities.count.must_equal 3 - annotation.entities.language.must_equal "en" - annotation.entities.unknown.map(&:name).must_equal [] - annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] - annotation.entities.locations.map(&:name).must_equal ["Utah"] - annotation.entities.places.map(&:name).must_equal ["Utah"] - annotation.entities.organizations.map(&:name).must_equal [] - annotation.entities.events.map(&:name).must_equal [] - annotation.entities.artwork.map(&:name).must_equal [] - annotation.entities.goods.map(&:name).must_equal [] - annotation.entities.other.map(&:name).must_equal [] - - annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] - annotation.tokens.count.must_equal 24 - token = annotation.tokens.first - token.text.must_equal "Hello" - token.part_of_speech.must_equal :X - token.head_token_index.must_equal 0 - token.label.must_equal :ROOT - token.lemma.must_equal "Hello" - end - - it "annotation with creating a document" do - doc = language.document content - - annotation = language.annotate doc - - annotation.language.must_equal "en" - - annotation.sentiment.language.must_equal "en" - annotation.sentiment.polarity.must_be_close_to 1.0 - annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 - - annotation.entities.count.must_equal 3 - annotation.entities.language.must_equal "en" - annotation.entities.unknown.map(&:name).must_equal [] - annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] - annotation.entities.locations.map(&:name).must_equal ["Utah"] - annotation.entities.places.map(&:name).must_equal ["Utah"] - annotation.entities.organizations.map(&:name).must_equal [] - annotation.entities.events.map(&:name).must_equal [] - annotation.entities.artwork.map(&:name).must_equal [] - annotation.entities.goods.map(&:name).must_equal [] - annotation.entities.other.map(&:name).must_equal [] - - annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] - annotation.tokens.count.must_equal 24 - token = annotation.tokens.first - token.text.must_equal "Hello" - token.part_of_speech.must_equal :X - token.head_token_index.must_equal 0 - token.label.must_equal :ROOT - token.lemma.must_equal "Hello" - end -end diff --git a/google-cloud-language/acceptance/language/text_test.rb b/google-cloud-language/acceptance/language/text_test.rb new file mode 100644 index 000000000000..71619ad5d98d --- /dev/null +++ b/google-cloud-language/acceptance/language/text_test.rb @@ -0,0 +1,305 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (TEXT)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello} #{sayhi} #{ruby}" } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate content, format: :text + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document" do + doc = language.document content + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document content, format: :text, language: :en + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #text helper method" do + doc = language.text content + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document content, format: :text + doc.text! + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document content + doc.format = :text + doc.language = :en + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document content, format: :text + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + it "works without creating a document" do + entities = language.entities content, format: :text + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document content + doc.must_be :text? + doc.wont_be :html? + + entities = doc.entities + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "Utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06979143619537354 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 51 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment content, format: :text + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + + it "works with creating a document" do + doc = language.document content + doc.must_be :text? + doc.wont_be :html? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + end +end diff --git a/google-cloud-language/lib/google/cloud/language/document.rb b/google-cloud-language/lib/google/cloud/language/document.rb index 11c4fe843d4b..24b368ccbbe7 100644 --- a/google-cloud-language/lib/google/cloud/language/document.rb +++ b/google-cloud-language/lib/google/cloud/language/document.rb @@ -46,6 +46,148 @@ def initialize @service = nil end + ## + # The Document's format. `:text` or `:html` + # + def format + return :text if text? + return :html if html? + end + + def format= new_format + @grpc.type = :PLAIN_TEXT if new_format.to_s == "text" + @grpc.type = :HTML if new_format.to_s == "html" + @grpc.type + end + + ## + # Whether the Document is the TEXT format. + # + def text? + @grpc.type == :PLAIN_TEXT + end + + ## + # Sets the Document to the TEXT format. + # + def text! + @grpc.type = :PLAIN_TEXT + end + + ## + # Whether the Document is the HTML format. + # + def html? + @grpc.type == :HTML + end + + ## + # Sets the Document to the HTML format. + # + def html! + @grpc.type = :HTML + end + + ## + # The Document's language. + # + def language + @grpc.language + end + + ## + # The Document's language. + # + def language= new_language + new_language = new_language.to_s unless new_language.nil? + @grpc.language = new_language + end + + ## + # TODO: Details + # + # @param [Boolean] text Whether to perform the textual analysis. + # Optional. + # @param [Boolean] entities Whether to perform the entitiy analysis. + # Optional. + # @param [Boolean] sentiment Whether to perform the sentiment analysis. + # Optional. + # @param [String] encoding The encoding type used by the API to + # calculate offsets. Optional. + # + # @return [Annotation>] The results for the content analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello world!" + # + # annotation = doc.annotate + # annotation.thing #=> Some Result + # + def annotate text: false, entities: false, sentiment: false, + encoding: nil + ensure_service! + grpc = @service.annotate to_grpc, text: text, entities: entities, + sentiment: sentiment, + encoding: encoding + Annotation.from_grpc grpc + end + alias_method :mark, :annotate + alias_method :detect, :annotate + + ## + # TODO: Details + # + # @param [String] encoding The encoding type used by the API to + # calculate offsets. Optional. + # + # @return [Annotation::Entities>] The results for the entities analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello Chris and Mike!" + # + # entities = doc.entities + # entities.count #=> 2 + # + def entities encoding: nil + ensure_service! + grpc = @service.entities to_grpc, encoding: encoding + Annotation::Entities.from_grpc grpc + end + + ## + # TODO: Details + # + # @return [Annotation::Sentiment>] The results for the sentiment + # analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello Chris and Mike!" + # + # sentiment = doc.sentiment + # sentiment.polarity #=> 1.0 + # sentiment.magnitude #=> 0.8999999761581421 + # + def sentiment + ensure_service! + grpc = @service.sentiment to_grpc + Annotation::Sentiment.from_grpc grpc + end + ## # @private New gRPC object. def to_grpc @@ -66,8 +208,14 @@ def self.from_grpc grpc, service def self.from_source source, service, format: nil, language: nil source = String source grpc = Google::Cloud::Language::V1beta1::Document.new( - content: source, type: :PLAIN_TEXT + content: source ) + if format.to_s == "html" + grpc.type = :HTML + else + grpc.type = :PLAIN_TEXT + end + grpc.language = language.to_s unless language.nil? from_grpc grpc, service end diff --git a/google-cloud-language/lib/google/cloud/language/project.rb b/google-cloud-language/lib/google/cloud/language/project.rb index f45b0c1593b4..1f6ef9d39648 100644 --- a/google-cloud-language/lib/google/cloud/language/project.rb +++ b/google-cloud-language/lib/google/cloud/language/project.rb @@ -120,6 +120,14 @@ def document content, format: nil, language: nil end alias_method :doc, :document + def text content, language: nil + document content, format: :text, language: language + end + + def html content, language: nil + document content, format: :html, language: language + end + ## # TODO: Details # @@ -166,6 +174,74 @@ def annotate content, text: false, entities: false, sentiment: false, alias_method :mark, :annotate alias_method :detect, :annotate + ## + # TODO: Details + # + # @param [String, Document] content The content to annotate. This + # can be an {Document} instance, or any other type that converts to an + # {Document}. See {#document} for details. + # @param [String] format The format of the document (TEXT/HTML). + # Optional. + # @param [String] language The language of the document (if not + # specified, the language is automatically detected). Both ISO and + # BCP-47 language codes are accepted. Optional. + # @param [String] encoding The encoding type used by the API to + # calculate offsets. Optional. + # + # @return [Annotation::Entities>] The results for the entities analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello Chris and Mike!" + # + # entities = language.entities doc + # entities.count #=> 2 + # + def entities content, format: :text, language: nil, encoding: nil + ensure_service! + doc = document content, language: language, format: format + grpc = service.entities doc.to_grpc, encoding: encoding + Annotation::Entities.from_grpc grpc + end + + ## + # TODO: Details + # + # @param [String, Document] content The content to annotate. This + # can be an {Document} instance, or any other type that converts to an + # {Document}. See {#document} for details. + # @param [String] format The format of the document (TEXT/HTML). + # Optional. + # @param [String] language The language of the document (if not + # specified, the language is automatically detected). Both ISO and + # BCP-47 language codes are accepted. Optional. + # + # @return [Annotation::Sentiment>] The results for the sentiment + # analysis. + # + # @example + # require "google/cloud" + # + # gcloud = Google::Cloud.new + # language = gcloud.language + # + # doc = language.document "Hello Chris and Mike!" + # + # sentiment = language.sentiment doc + # sentiment.polarity #=> 1.0 + # sentiment.magnitude #=> 0.8999999761581421 + # + def sentiment content, format: :text, language: nil + ensure_service! + doc = document content, language: language, format: format + grpc = service.sentiment doc.to_grpc + Annotation::Sentiment.from_grpc grpc + end + protected ## From 3ecf81e5d8afba451882285a023bdb3098ff8caa Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Fri, 26 Aug 2016 10:25:53 -0600 Subject: [PATCH 3/9] Add support for Storage URLs Allow Document to know if it contains content or a URL. --- google-cloud-language/Gemfile | 1 + .../language/storage/html_file_test.rb | 313 +++++++++++++++++ .../language/storage/html_url_test.rb | 314 ++++++++++++++++++ .../language/storage/text_file_test.rb | 309 +++++++++++++++++ .../language/storage/text_url_test.rb | 310 +++++++++++++++++ .../acceptance/language_helper.rb | 29 ++ .../lib/google/cloud/language/document.rb | 32 +- .../lib/google/cloud/language/project.rb | 13 +- 8 files changed, 1315 insertions(+), 6 deletions(-) create mode 100644 google-cloud-language/acceptance/language/storage/html_file_test.rb create mode 100644 google-cloud-language/acceptance/language/storage/html_url_test.rb create mode 100644 google-cloud-language/acceptance/language/storage/text_file_test.rb create mode 100644 google-cloud-language/acceptance/language/storage/text_url_test.rb diff --git a/google-cloud-language/Gemfile b/google-cloud-language/Gemfile index a2c298e5a5fe..edc9a3445867 100644 --- a/google-cloud-language/Gemfile +++ b/google-cloud-language/Gemfile @@ -4,6 +4,7 @@ gemspec gem "rake" gem "google-cloud-core", path: "../google-cloud-core" +gem "google-cloud-storage", path: "../google-cloud-storage" gem "gcloud-jsondoc", git: "https://github.com/GoogleCloudPlatform/gcloud-ruby.git", branch: "gcloud-jsondoc" diff --git a/google-cloud-language/acceptance/language/storage/html_file_test.rb b/google-cloud-language/acceptance/language/storage/html_file_test.rb new file mode 100644 index 000000000000..2419d96cb8c7 --- /dev/null +++ b/google-cloud-language/acceptance/language/storage/html_file_test.rb @@ -0,0 +1,313 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (HTML/Storage File)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello}" + \ + "

#{sayhi}

#{ruby}

" } + + let(:storage) { Google::Cloud.storage } + let(:bucket) { storage.bucket($lang_prefix) || storage.create_bucket($lang_prefix) } + let(:file_io) { t = Tempfile.new(["language", ".html"]); t.write content.encode("UTF-8"); t.rewind; t } + let(:file) { bucket.file("language.html") || bucket.create_file(file_io, "language.html") } + let(:url) { file.to_gs_url } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate file, format: :html + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document file, format: :html, language: :en + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #text helper method" do + doc = language.html file + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document file, format: :text + doc.html! + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document file, format: :text + doc.format = :html + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document file, format: :html + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + it "works without creating a document" do + entities = language.entities file, format: :html + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :html? + doc.wont_be :text? + + entities = doc.entities + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06173757091164589 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 102 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment file, format: :html + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :html? + doc.wont_be :text? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + end +end diff --git a/google-cloud-language/acceptance/language/storage/html_url_test.rb b/google-cloud-language/acceptance/language/storage/html_url_test.rb new file mode 100644 index 000000000000..06e6f04b212c --- /dev/null +++ b/google-cloud-language/acceptance/language/storage/html_url_test.rb @@ -0,0 +1,314 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (HTML/Storage URL)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello}" + \ + "

#{sayhi}

#{ruby}

" } + + let(:storage) { Google::Cloud.storage } + let(:bucket) { storage.bucket($lang_prefix) || storage.create_bucket($lang_prefix) } + let(:file_io) { t = Tempfile.new(["language", ".html"]); t.write content.encode("UTF-8"); t.rewind; t } + let(:file) { bucket.file("language.html") || bucket.create_file(file_io, "language.html") } + let(:url) { file.to_gs_url } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate url, format: :html + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document" do + doc = language.document url + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document url, format: :html, language: :en + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #text helper method" do + doc = language.html url + doc.must_be :html? + doc.wont_be :text? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document url, format: :text + doc.html! + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document url, format: "text" + doc.format = :html + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document url, format: :html + doc.must_be :html? + doc.wont_be :text? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + + it "works without creating a document" do + entities = language.entities url, format: :html, language: "en" + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document url, language: "en" + doc.must_be :html? + doc.wont_be :text? + + entities = doc.entities encoding: :UTF8 + + entities.language.must_equal "en" + + entities.count.must_equal 2 + entities.language.must_equal "en" + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["chris"] + entities.locations.map(&:name).must_equal ["utah"] + entities.places.map(&:name).must_equal ["utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06173757091164589 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 102 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment url, format: :html + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + + it "works with creating a document" do + doc = language.document url + doc.must_be :html? + doc.wont_be :text? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 1.899999976158142 + end + end +end diff --git a/google-cloud-language/acceptance/language/storage/text_file_test.rb b/google-cloud-language/acceptance/language/storage/text_file_test.rb new file mode 100644 index 000000000000..03b5eaca966c --- /dev/null +++ b/google-cloud-language/acceptance/language/storage/text_file_test.rb @@ -0,0 +1,309 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (TEXT/Storage File)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello} #{sayhi} #{ruby}" } + + let(:storage) { Google::Cloud.storage } + let(:bucket) { storage.bucket($lang_prefix) || storage.create_bucket($lang_prefix) } + let(:file_io) { t = Tempfile.new(["language", ".txt"]); t.write content.encode("UTF-8"); t.rewind; t } + let(:file) { bucket.file("language.txt") || bucket.create_file(file_io, "language.txt") } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate file, format: :text + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document file, format: :text, language: :en + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #text helper method" do + doc = language.text file + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document file, format: :text + doc.text! + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document file + doc.format = :text + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document file, format: :text + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + it "works without creating a document" do + entities = language.entities file, format: :text + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :text? + doc.wont_be :html? + + entities = doc.entities + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "Utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06979143619537354 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 51 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment file, format: :text + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + + it "works with creating a document" do + doc = language.document file + doc.must_be :text? + doc.wont_be :html? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + end +end diff --git a/google-cloud-language/acceptance/language/storage/text_url_test.rb b/google-cloud-language/acceptance/language/storage/text_url_test.rb new file mode 100644 index 000000000000..b0888a2273c6 --- /dev/null +++ b/google-cloud-language/acceptance/language/storage/text_url_test.rb @@ -0,0 +1,310 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "language_helper" + +describe "Language (TEXT/Storage URL)", :language do + let(:hello) { "Hello from Chris and Mike!" } + let(:sayhi) { "If you find yourself in Utah, come say hi!" } + let(:ruby) { "We love ruby and writing code." } + let(:content) { "#{hello} #{sayhi} #{ruby}" } + + let(:storage) { Google::Cloud.storage } + let(:bucket) { storage.bucket($lang_prefix) || storage.create_bucket($lang_prefix) } + let(:file_io) { t = Tempfile.new(["language", ".txt"]); t.write content.encode("UTF-8"); t.rewind; t } + let(:file) { bucket.file("language.txt") || bucket.create_file(file_io, "language.txt") } + let(:url) { file.to_gs_url } + + describe "annotation" do + it "works without creating a document" do + annotation = language.annotate url, format: :text + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document" do + doc = language.document url + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document with format and language options" do + doc = language.document url, format: :text, language: :en + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "works with creating a document using #text helper method" do + doc = language.text url + doc.must_be :text? + doc.wont_be :html? + + annotation = language.annotate doc + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the text feature" do + doc = language.document url, format: :text + doc.text! + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate text: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.must_be :empty? + + annotation.sentences.map(&:text).must_equal [hello, sayhi, ruby] + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "runs only the sentiment feature" do + doc = language.document url + doc.format = :text + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate sentiment: true + + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 2.0999999046325684 + + annotation.entities.must_be :empty? + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + + it "runs only the sentiment feature" do + doc = language.document url, format: :text + doc.must_be :text? + doc.wont_be :html? + + annotation = doc.annotate entities: true + + annotation.language.must_equal "en" + + annotation.sentiment.must_be :nil? + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.must_be :empty? + annotation.tokens.must_be :empty? + end + end + + describe "entities" do + it "works without creating a document" do + entities = language.entities url, format: :text + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + end + + it "works with creating a document" do + doc = language.document url + doc.must_be :text? + doc.wont_be :html? + + entities = doc.entities + + entities.language.must_equal "en" + + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.name.must_equal "Utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.06979143619537354 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal 51 + end + end + + describe "sentiment" do + it "works without creating a document" do + sentiment = language.sentiment url, format: :text + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + + it "works with creating a document" do + doc = language.document url + doc.must_be :text? + doc.wont_be :html? + + sentiment = doc.sentiment + + sentiment.language.must_equal "en" + + sentiment.polarity.must_be_close_to 1.0 + sentiment.magnitude.must_be_close_to 2.0999999046325684 + end + end +end diff --git a/google-cloud-language/acceptance/language_helper.rb b/google-cloud-language/acceptance/language_helper.rb index 84d776ee72c6..4fb7484c297e 100644 --- a/google-cloud-language/acceptance/language_helper.rb +++ b/google-cloud-language/acceptance/language_helper.rb @@ -21,6 +21,11 @@ # Create shared language object so we don't create new for each test $language = Google::Cloud.language retries: 10 +require "google/cloud/storage" + +# Create shared storage object so we don't create new for each test +$storage = Google::Cloud.new.storage retries: 10 + module Acceptance ## # Test class for running against a Language instance. @@ -44,6 +49,10 @@ def setup refute_nil @language, "You do not have an active language to run the tests." + @storage = $storage + + refute_nil @storage, "You do not have an active storage to run the tests." + super end @@ -66,3 +75,23 @@ def self.run_one_method klass, method_name, reporter reporter.record result end end + +# Create buckets to be shared with all the tests +require "time" +require "securerandom" +t = Time.now.utc.iso8601.gsub ":", "-" +$lang_prefix = "gcloud-language-acceptance-#{t}-#{SecureRandom.hex(4)}".downcase + +def clean_up_language_storage_objects + puts "Cleaning up storage buckets after language tests." + if b = $storage.bucket($lang_prefix) + b.files.all(&:delete) + b.delete + end +rescue => e + puts "Error while cleaning up storage buckets after language tests.\n\n#{e}" +end + +Minitest.after_run do + clean_up_language_storage_objects +end diff --git a/google-cloud-language/lib/google/cloud/language/document.rb b/google-cloud-language/lib/google/cloud/language/document.rb index 24b368ccbbe7..b14719b02110 100644 --- a/google-cloud-language/lib/google/cloud/language/document.rb +++ b/google-cloud-language/lib/google/cloud/language/document.rb @@ -46,6 +46,28 @@ def initialize @service = nil end + ## + # @private Whether the Document has content. + # + def content? + @grpc.source == :content + end + + ## + # @private Whether the Document is a URL. + # + def url? + @grpc.source == :gcs_content_uri + end + + ## + # @private The source of the content + # + def source + return @grpc.content if content? + @grpc.gcs_content_uri + end + ## # The Document's format. `:text` or `:html` # @@ -207,9 +229,13 @@ def self.from_grpc grpc, service # @private def self.from_source source, service, format: nil, language: nil source = String source - grpc = Google::Cloud::Language::V1beta1::Document.new( - content: source - ) + grpc = Google::Cloud::Language::V1beta1::Document.new + if source.start_with? "gs://" + grpc.gcs_content_uri = source + format ||= :html if source.end_with? ".html" + else + grpc.content = source + end if format.to_s == "html" grpc.type = :HTML else diff --git a/google-cloud-language/lib/google/cloud/language/project.rb b/google-cloud-language/lib/google/cloud/language/project.rb index 1f6ef9d39648..533689ee43da 100644 --- a/google-cloud-language/lib/google/cloud/language/project.rb +++ b/google-cloud-language/lib/google/cloud/language/project.rb @@ -114,9 +114,16 @@ def self.default_project # doc = language.document file # def document content, format: nil, language: nil - return content if content.is_a? Document - Document.from_source content, @service, format: format, - language: language + content = content.to_gs_url if content.respond_to? :to_gs_url + if content.is_a? Document + # Create new document with the provided format and language + Document.from_source content.source, @service, + format: (format || content.format), + language: (language || content.language) + else + Document.from_source content, @service, format: format, + language: language + end end alias_method :doc, :document From 1251517eb870f771fffcd94f36a93953d2aedfb2 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 08:29:15 -0600 Subject: [PATCH 4/9] Add Language Entity type helper methods Also add some unit test coverage for Entity and Entities. --- .../lib/google/cloud/language/annotation.rb | 49 ++++++++-- .../language/annotation/entities_test.rb | 90 +++++++++++++++++++ .../cloud/language/annotation/entity_test.rb | 56 ++++++++++++ 3 files changed, 187 insertions(+), 8 deletions(-) create mode 100644 google-cloud-language/test/google/cloud/language/annotation/entities_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/annotation/entity_test.rb diff --git a/google-cloud-language/lib/google/cloud/language/annotation.rb b/google-cloud-language/lib/google/cloud/language/annotation.rb index 563740dfd5da..e54102c7b228 100644 --- a/google-cloud-language/lib/google/cloud/language/annotation.rb +++ b/google-cloud-language/lib/google/cloud/language/annotation.rb @@ -141,36 +141,36 @@ def initialize entities = [], language = nil end def unknown - select { |e| e.type == :UNKNOWN } + select(&:unknown?) end def people - select { |e| e.type == :PERSON } + select(&:person?) end def locations - select { |e| e.type == :LOCATION } + select(&:location?) end alias_method :places, :locations def organizations - select { |e| e.type == :ORGANIZATION } + select(&:organization?) end def events - select { |e| e.type == :EVENT } + select(&:event?) end def artwork - select { |e| e.type == :WORK_OF_ART } + select(&:artwork?) end def goods - select { |e| e.type == :CONSUMER_GOOD } + select(&:good?) end def other - select { |e| e.type == :OTHER } + select(&:other?) end ## @@ -195,6 +195,39 @@ def initialize name, type, metadata, salience, mentions @mentions = mentions end + def unknown? + type == :UNKNOWN + end + + def person? + type == :PERSON + end + + def location? + type == :LOCATION + end + alias_method :place?, :location? + + def organization? + type == :ORGANIZATION + end + + def event? + type == :EVENT + end + + def artwork? + type == :WORK_OF_ART + end + + def good? + type == :CONSUMER_GOOD + end + + def other? + type == :OTHER + end + def wikipedia_url metadata["wikipedia_url"] end diff --git a/google-cloud-language/test/google/cloud/language/annotation/entities_test.rb b/google-cloud-language/test/google/cloud/language/annotation/entities_test.rb new file mode 100644 index 000000000000..a1f25d599d35 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation/entities_test.rb @@ -0,0 +1,90 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation::Entities do + let(:entity_json) do + %{ + { + "entities": [{ + "name": "Chris", + "type": "PERSON", + "metadata": {}, + "salience": 0.5138337, + "mentions": [{ + "text": { + "content": "Chris", + "beginOffset": -1 + } + }] + }, { + "name": "Mike", + "type": "PERSON", + "metadata": {}, + "salience": 0.1997266, + "mentions": [{ + "text": { + "content": "Mike", + "beginOffset": -1 + } + }] + }, { + "name": "Utah", + "type": "LOCATION", + "metadata": { + "wikipedia_url": "http://en.wikipedia.org/wiki/Utah" + }, + "salience": 0.069791436, + "mentions": [{ + "text": { + "content": "Utah", + "beginOffset": -1 + } + }] + }], + "language": "en" + } + } + end + let(:entities_grpc) { Google::Cloud::Language::V1beta1::AnalyzeEntitiesResponse.decode_json entity_json } + let(:entities) { Google::Cloud::Language::Annotation::Entities.from_grpc entities_grpc } + + it "has attributes" do + entities.language.must_equal "en" + + entities.must_be_kind_of ::Array # Because its a DelegateClass(::Array) + entities.class.must_equal Google::Cloud::Language::Annotation::Entities + entities.count.must_equal 3 + entities.unknown.map(&:name).must_equal [] + entities.people.map(&:name).must_equal ["Chris", "Mike"] + entities.locations.map(&:name).must_equal ["Utah"] + entities.places.map(&:name).must_equal ["Utah"] + entities.organizations.map(&:name).must_equal [] + entities.events.map(&:name).must_equal [] + entities.artwork.map(&:name).must_equal [] + entities.goods.map(&:name).must_equal [] + entities.other.map(&:name).must_equal [] + + entities.places.first.must_be_kind_of Google::Cloud::Language::Annotation::Entity + entities.places.first.name.must_equal "Utah" + entities.places.first.type.must_equal :LOCATION + entities.places.first.metadata.must_equal({"wikipedia_url"=>"http://en.wikipedia.org/wiki/Utah"}) + entities.places.first.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entities.places.first.salience.must_be_close_to 0.069791436 + entities.places.first.mentions.count.must_equal 1 + entities.places.first.mentions.first.text.must_equal "Utah" + entities.places.first.mentions.first.offset.must_equal -1 + end +end diff --git a/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb b/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb new file mode 100644 index 000000000000..206f483d00fc --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb @@ -0,0 +1,56 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation::Entity do + let(:entity_hash) do + { name: "Utah", + type: "LOCATION", + metadata: { wikipedia_url: "http://en.wikipedia.org/wiki/Utah" }, + salience: 0.069791436, + mentions: [{ text: { content: "Utah", beginOffset: -1 } }] + } + end + let(:entity_json) { entity_hash.to_json } + let(:entity_grpc) { Google::Cloud::Language::V1beta1::Entity.decode_json entity_json } + let(:entity) { Google::Cloud::Language::Annotation::Entity.from_grpc entity_grpc } + + it "has attributes" do + entity.must_be_kind_of Google::Cloud::Language::Annotation::Entity + entity.name.must_equal "Utah" + entity.type.must_equal :LOCATION + entity.metadata.must_be_kind_of Hash + entity.wikipedia_url.must_equal "http://en.wikipedia.org/wiki/Utah" + entity.salience.must_be_close_to 0.069791436 + entity.mentions.must_be_kind_of Array + entity.mentions.count.must_equal 1 + entity.mentions.first.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + entity.mentions.first.text.must_equal "Utah" + entity.mentions.first.offset.must_equal -1 + end + + it "has helper methods" do + entity.must_be :location? + entity.must_be :place? + + entity.wont_be :unknown? + entity.wont_be :person? + entity.wont_be :organization? + entity.wont_be :event? + entity.wont_be :artwork? + entity.wont_be :good? + entity.wont_be :other? + end +end From 6e79aee91431fcfdaf7ea06f4b3ec487c79ee342 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 09:00:02 -0600 Subject: [PATCH 5/9] Add Language Sentiment tests --- .../language/annotation/sentiment_test.rb | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 google-cloud-language/test/google/cloud/language/annotation/sentiment_test.rb diff --git a/google-cloud-language/test/google/cloud/language/annotation/sentiment_test.rb b/google-cloud-language/test/google/cloud/language/annotation/sentiment_test.rb new file mode 100644 index 000000000000..062f54dc1315 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation/sentiment_test.rb @@ -0,0 +1,35 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation::Sentiment do + let(:sentiment_hash) do + { + documentSentiment: { polarity: 1, magnitude: 2.0999999 }, + language: "en" + } + end + let(:sentiment_json) { sentiment_hash.to_json } + let(:sentiment_grpc) { Google::Cloud::Language::V1beta1::AnalyzeSentimentResponse.decode_json sentiment_json } + let(:sentiment) { Google::Cloud::Language::Annotation::Sentiment.from_grpc sentiment_grpc } + + it "has attributes" do + sentiment.must_be_kind_of Google::Cloud::Language::Annotation::Sentiment + + sentiment.language.must_equal "en" + sentiment.polarity.must_equal 1.0 + sentiment.magnitude.must_equal 2.0999999046325684 + end +end From e5365890de59c51978d15b6d64c8c9b51b0697e2 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 09:00:31 -0600 Subject: [PATCH 6/9] Add Language Annotation tests --- .../google/cloud/language/annotation_test.rb | 111 ++++++ google-cloud-language/test/helper.rb | 7 + google-cloud-language/test/html.json | 358 +++++++++++++++++ google-cloud-language/test/text.json | 369 ++++++++++++++++++ 4 files changed, 845 insertions(+) create mode 100644 google-cloud-language/test/google/cloud/language/annotation_test.rb create mode 100644 google-cloud-language/test/html.json create mode 100644 google-cloud-language/test/text.json diff --git a/google-cloud-language/test/google/cloud/language/annotation_test.rb b/google-cloud-language/test/google/cloud/language/annotation_test.rb new file mode 100644 index 000000000000..2f8aea5861ea --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation_test.rb @@ -0,0 +1,111 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation, :mock_language do + let(:text_annotation_grpc) { Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json } + let(:html_annotation_grpc) { Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json } + let(:annotation) { Google::Cloud::Language::Annotation.from_grpc annotation_grpc } + + it "represents a plain text annotation response" do + annotation = Google::Cloud::Language::Annotation.from_grpc text_annotation_grpc + annotation.must_be_kind_of Google::Cloud::Language::Annotation + + annotation.language.must_equal "en" + + annotation.sentiment.must_be_kind_of Google::Cloud::Language::Annotation::Sentiment + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.must_be_kind_of ::Array + annotation.entities.class.must_equal Google::Cloud::Language::Annotation::Entities + annotation.entities.each do |entity| + entity.must_be_kind_of Google::Cloud::Language::Annotation::Entity + end + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.each do |sentence| + sentence.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + end + annotation.sentences.map(&:text).must_equal text_sentences + + annotation.tokens.each do |token| + token.must_be_kind_of Google::Cloud::Language::Annotation::Token + end + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end + + it "represents an html annotation response" do + annotation = Google::Cloud::Language::Annotation.from_grpc html_annotation_grpc + annotation.must_be_kind_of Google::Cloud::Language::Annotation + + annotation.language.must_equal "en" + + annotation.sentiment.must_be_kind_of Google::Cloud::Language::Annotation::Sentiment + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be_kind_of ::Array + annotation.entities.class.must_equal Google::Cloud::Language::Annotation::Entities + annotation.entities.each do |entity| + entity.must_be_kind_of Google::Cloud::Language::Annotation::Entity + end + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.each do |sentence| + sentence.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + end + annotation.sentences.map(&:text).must_equal html_sentences + + annotation.tokens.each do |token| + token.must_be_kind_of Google::Cloud::Language::Annotation::Token + end + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/test/helper.rb b/google-cloud-language/test/helper.rb index 7cbd9dd42530..4c48680ec8c1 100644 --- a/google-cloud-language/test/helper.rb +++ b/google-cloud-language/test/helper.rb @@ -26,6 +26,13 @@ class MockLanguage < Minitest::Spec let(:credentials) { OpenStruct.new(client: OpenStruct.new(updater_proc: Proc.new {})) } let(:language) { Google::Cloud::Language::Project.new(Google::Cloud::Language::Service.new(project, credentials)) } + let(:text_content) { "Hello from Chris and Mike! If you find yourself in Utah, come say hi! We love ruby and writing code." } + let(:text_sentences) { ["Hello from Chris and Mike!", "If you find yourself in Utah, come say hi!", "We love ruby and writing code."] } + let(:text_json) { File.read(File.dirname(__FILE__) + "/text.json") } + let(:html_content) { "Hello from Chris and Mike!

If you find yourself in Utah, come say hi!

We love ruby and writing code.

" } + let(:html_sentences) { ["Hello from Chris and Mike!", "If you find yourself in Utah, come say hi!", "We love ruby and writing code."] } + let(:html_json) { File.read(File.dirname(__FILE__) + "/html.json") } + # Register this spec type for when :language is used. register_spec_type(self) do |desc, *addl| addl.include? :mock_language diff --git a/google-cloud-language/test/html.json b/google-cloud-language/test/html.json new file mode 100644 index 000000000000..8bc80622e3b6 --- /dev/null +++ b/google-cloud-language/test/html.json @@ -0,0 +1,358 @@ +{ + "sentences": [{ + "text": { + "content": "Hello from Chris and Mike!", + "beginOffset": -1 + } + }, { + "text": { + "content": "If you find yourself in Utah, come say hi!", + "beginOffset": -1 + } + }, { + "text": { + "content": "We love ruby and writing code.", + "beginOffset": -1 + } + }], + "tokens": [{ + "text": { + "content": "Hello", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "label": "ROOT" + }, + "lemma": "Hello" + }, { + "text": { + "content": "from", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "label": "PREP" + }, + "lemma": "from" + }, { + "text": { + "content": "Chris", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 1, + "label": "POBJ" + }, + "lemma": "Chris" + }, { + "text": { + "content": "and", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "CONJ" + }, + "dependencyEdge": { + "headTokenIndex": 2, + "label": "CC" + }, + "lemma": "and" + }, { + "text": { + "content": "Mike", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 2, + "label": "CONJ" + }, + "lemma": "Mike" + }, { + "text": { + "content": "!", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "label": "P" + }, + "lemma": "!" + }, { + "text": { + "content": "If", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "MARK" + }, + "lemma": "If" + }, { + "text": { + "content": "you", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "NSUBJ" + }, + "lemma": "you" + }, { + "text": { + "content": "find", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "ADVCL" + }, + "lemma": "find" + }, { + "text": { + "content": "yourself", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "DOBJ" + }, + "lemma": "yourself" + }, { + "text": { + "content": "in", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "PREP" + }, + "lemma": "in" + }, { + "text": { + "content": "Utah", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 10, + "label": "POBJ" + }, + "lemma": "Utah" + }, { + "text": { + "content": ",", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "," + }, { + "text": { + "content": "come", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "ROOT" + }, + "lemma": "come" + }, { + "text": { + "content": "say", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "say" + }, { + "text": { + "content": "hi", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "DISCOURSE" + }, + "lemma": "hi" + }, { + "text": { + "content": "!", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "!" + }, { + "text": { + "content": "We", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "NSUBJ" + }, + "lemma": "We" + }, { + "text": { + "content": "love", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "ROOT" + }, + "lemma": "love" + }, { + "text": { + "content": "ruby", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "DOBJ" + }, + "lemma": "ruby" + }, { + "text": { + "content": "and", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "CONJ" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "CC" + }, + "lemma": "and" + }, { + "text": { + "content": "writing", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "CONJ" + }, + "lemma": "write" + }, { + "text": { + "content": "code", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 21, + "label": "DOBJ" + }, + "lemma": "code" + }, { + "text": { + "content": ".", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "P" + }, + "lemma": "." + }], + "entities": [{ + "name": "chris", + "type": "PERSON", + "metadata": {}, + "salience": 0.45453781, + "mentions": [{ + "text": { + "content": "Chris", + "beginOffset": -1 + } + }] + }, { + "name": "utah", + "type": "LOCATION", + "metadata": { + "wikipedia_url": "http://en.wikipedia.org/wiki/Utah" + }, + "salience": 0.061737571, + "mentions": [{ + "text": { + "content": "Utah", + "beginOffset": -1 + } + }] + }], + "documentSentiment": { + "polarity": 1, + "magnitude": 1.9 + }, + "language": "en" +} diff --git a/google-cloud-language/test/text.json b/google-cloud-language/test/text.json new file mode 100644 index 000000000000..d233ef6defa3 --- /dev/null +++ b/google-cloud-language/test/text.json @@ -0,0 +1,369 @@ +{ + "sentences": [{ + "text": { + "content": "Hello from Chris and Mike!", + "beginOffset": -1 + } + }, { + "text": { + "content": "If you find yourself in Utah, come say hi!", + "beginOffset": -1 + } + }, { + "text": { + "content": "We love ruby and writing code.", + "beginOffset": -1 + } + }], + "tokens": [{ + "text": { + "content": "Hello", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "label": "ROOT" + }, + "lemma": "Hello" + }, { + "text": { + "content": "from", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "label": "PREP" + }, + "lemma": "from" + }, { + "text": { + "content": "Chris", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 1, + "label": "POBJ" + }, + "lemma": "Chris" + }, { + "text": { + "content": "and", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "CONJ" + }, + "dependencyEdge": { + "headTokenIndex": 2, + "label": "CC" + }, + "lemma": "and" + }, { + "text": { + "content": "Mike", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 2, + "label": "CONJ" + }, + "lemma": "Mike" + }, { + "text": { + "content": "!", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "label": "P" + }, + "lemma": "!" + }, { + "text": { + "content": "If", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "MARK" + }, + "lemma": "If" + }, { + "text": { + "content": "you", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "NSUBJ" + }, + "lemma": "you" + }, { + "text": { + "content": "find", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "ADVCL" + }, + "lemma": "find" + }, { + "text": { + "content": "yourself", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "DOBJ" + }, + "lemma": "yourself" + }, { + "text": { + "content": "in", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "ADP" + }, + "dependencyEdge": { + "headTokenIndex": 8, + "label": "PREP" + }, + "lemma": "in" + }, { + "text": { + "content": "Utah", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 10, + "label": "POBJ" + }, + "lemma": "Utah" + }, { + "text": { + "content": ",", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "," + }, { + "text": { + "content": "come", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "ROOT" + }, + "lemma": "come" + }, { + "text": { + "content": "say", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "say" + }, { + "text": { + "content": "hi", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "X" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "DISCOURSE" + }, + "lemma": "hi" + }, { + "text": { + "content": "!", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 13, + "label": "P" + }, + "lemma": "!" + }, { + "text": { + "content": "We", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PRON" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "NSUBJ" + }, + "lemma": "We" + }, { + "text": { + "content": "love", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "ROOT" + }, + "lemma": "love" + }, { + "text": { + "content": "ruby", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "DOBJ" + }, + "lemma": "ruby" + }, { + "text": { + "content": "and", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "CONJ" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "CC" + }, + "lemma": "and" + }, { + "text": { + "content": "writing", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "VERB" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "CONJ" + }, + "lemma": "write" + }, { + "text": { + "content": "code", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "NOUN" + }, + "dependencyEdge": { + "headTokenIndex": 21, + "label": "DOBJ" + }, + "lemma": "code" + }, { + "text": { + "content": ".", + "beginOffset": -1 + }, + "partOfSpeech": { + "tag": "PUNCT" + }, + "dependencyEdge": { + "headTokenIndex": 18, + "label": "P" + }, + "lemma": "." + }], + "entities": [{ + "name": "Chris", + "type": "PERSON", + "metadata": {}, + "salience": 0.5138337, + "mentions": [{ + "text": { + "content": "Chris", + "beginOffset": -1 + } + }] + }, { + "name": "Mike", + "type": "PERSON", + "metadata": {}, + "salience": 0.1997266, + "mentions": [{ + "text": { + "content": "Mike", + "beginOffset": -1 + } + }] + }, { + "name": "Utah", + "type": "LOCATION", + "metadata": { + "wikipedia_url": "http://en.wikipedia.org/wiki/Utah" + }, + "salience": 0.069791436, + "mentions": [{ + "text": { + "content": "Utah", + "beginOffset": -1 + } + }] + }], + "documentSentiment": { + "polarity": 1, + "magnitude": 2.0999999 + }, + "language": "en" +} From 0766f22871bb4f594c4b4083d0d592ec57321939 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 09:29:02 -0600 Subject: [PATCH 7/9] Add Language Token tests --- .../cloud/language/annotation/token_test.rb | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 google-cloud-language/test/google/cloud/language/annotation/token_test.rb diff --git a/google-cloud-language/test/google/cloud/language/annotation/token_test.rb b/google-cloud-language/test/google/cloud/language/annotation/token_test.rb new file mode 100644 index 000000000000..374aa6f5f7bb --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation/token_test.rb @@ -0,0 +1,46 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation::Token do + let(:token_hash) do + { + text: { + content: "Hello", + beginOffset: -1 + }, + partOfSpeech: { + tag: "X" + }, + dependencyEdge: { + label: "ROOT" + }, + lemma: "Hello" + } + end + let(:token_json) { token_hash.to_json } + let(:token_grpc) { Google::Cloud::Language::V1beta1::Token.decode_json token_json } + let(:token) { Google::Cloud::Language::Annotation::Token.from_grpc token_grpc } + + it "has attributes" do + token.must_be_kind_of Google::Cloud::Language::Annotation::Token + + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end From ef4a5448a764eeed5f8bdd5fa9824ca2bb4a8d45 Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 09:33:29 -0600 Subject: [PATCH 8/9] Add Language TextSpan tests --- .../language/annotation/text_span_test.rb | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb diff --git a/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb b/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb new file mode 100644 index 000000000000..2cf71cac22e8 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb @@ -0,0 +1,37 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Annotation::TextSpan do + let(:text_span_hash) do + { + "content": "Hello world!", + "beginOffset": -1 + } + end + let(:text_span_json) { text_span_hash.to_json } + let(:text_span_grpc) { Google::Cloud::Language::V1beta1::TextSpan.decode_json text_span_json } + let(:text_span) { Google::Cloud::Language::Annotation::TextSpan.from_grpc text_span_grpc } + + it "has attributes" do + text_span.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + + text_span.text.must_equal "Hello world!" + text_span.content.must_equal "Hello world!" + + text_span.offset.must_equal -1 + text_span.begin_offset.must_equal -1 + end +end From 6a61d0c55e3e2280bae0a25cb599fe87bde8913a Mon Sep 17 00:00:00 2001 From: Mike Moore Date: Tue, 23 Aug 2016 10:34:14 -0600 Subject: [PATCH 9/9] Add partial unit test coverage for annotate --- .../lib/google/cloud/language/document.rb | 25 +- .../cloud/language/annotation/entity_test.rb | 2 +- .../language/annotation/text_span_test.rb | 6 +- .../cloud/language/annotation/token_test.rb | 2 +- .../document/full_html_annotation_test.rb | 96 +++ .../document/full_text_annotation_test.rb | 120 ++++ .../google/cloud/language/document_test.rb | 115 ++++ .../cloud/language/project/document_test.rb | 112 ++++ .../project/full_html_annotation_test.rb | 473 ++++++++++++++ .../project/full_text_annotation_test.rb | 602 ++++++++++++++++++ .../google/cloud/language/project_test.rb | 22 + 11 files changed, 1561 insertions(+), 14 deletions(-) create mode 100644 google-cloud-language/test/google/cloud/language/document/full_html_annotation_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/document/full_text_annotation_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/document_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/project/document_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/project/full_html_annotation_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/project/full_text_annotation_test.rb create mode 100644 google-cloud-language/test/google/cloud/language/project_test.rb diff --git a/google-cloud-language/lib/google/cloud/language/document.rb b/google-cloud-language/lib/google/cloud/language/document.rb index b14719b02110..943c5f514129 100644 --- a/google-cloud-language/lib/google/cloud/language/document.rb +++ b/google-cloud-language/lib/google/cloud/language/document.rb @@ -39,6 +39,10 @@ module Language # annotation.thing #=> Some Result # class Document + ## + # @private The gRPC Service object. + attr_accessor :service + ## # @private Creates a new Document instance. def initialize @@ -76,6 +80,9 @@ def format return :html if html? end + ## + # Update the Document's format. Accepted values are `:text` or `:html`. + # def format= new_format @grpc.type = :PLAIN_TEXT if new_format.to_s == "text" @grpc.type = :HTML if new_format.to_s == "html" @@ -118,11 +125,11 @@ def language end ## - # The Document's language. + # Update the Document's language. ISO and BCP-47 language codes are + # accepted. # def language= new_language - new_language = new_language.to_s unless new_language.nil? - @grpc.language = new_language + @grpc.language = new_language.to_s end ## @@ -153,9 +160,9 @@ def language= new_language def annotate text: false, entities: false, sentiment: false, encoding: nil ensure_service! - grpc = @service.annotate to_grpc, text: text, entities: entities, - sentiment: sentiment, - encoding: encoding + grpc = service.annotate to_grpc, text: text, entities: entities, + sentiment: sentiment, + encoding: encoding Annotation.from_grpc grpc end alias_method :mark, :annotate @@ -182,7 +189,7 @@ def annotate text: false, entities: false, sentiment: false, # def entities encoding: nil ensure_service! - grpc = @service.entities to_grpc, encoding: encoding + grpc = service.entities to_grpc, encoding: encoding Annotation::Entities.from_grpc grpc end @@ -206,7 +213,7 @@ def entities encoding: nil # def sentiment ensure_service! - grpc = @service.sentiment to_grpc + grpc = service.sentiment to_grpc Annotation::Sentiment.from_grpc grpc end @@ -250,7 +257,7 @@ def self.from_source source, service, format: nil, language: nil ## # Raise an error unless an active language project object is available. def ensure_service! - fail "Must have active connection" unless @service + fail "Must have active connection" unless service end end end diff --git a/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb b/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb index 206f483d00fc..5ef46b89aa3b 100644 --- a/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb +++ b/google-cloud-language/test/google/cloud/language/annotation/entity_test.rb @@ -24,7 +24,7 @@ } end let(:entity_json) { entity_hash.to_json } - let(:entity_grpc) { Google::Cloud::Language::V1beta1::Entity.decode_json entity_json } + let(:entity_grpc) { Google::Cloud::Language::V1beta1::Entity.decode_json entity_json } let(:entity) { Google::Cloud::Language::Annotation::Entity.from_grpc entity_grpc } it "has attributes" do diff --git a/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb b/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb index 2cf71cac22e8..d0bbbc06189c 100644 --- a/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb +++ b/google-cloud-language/test/google/cloud/language/annotation/text_span_test.rb @@ -17,12 +17,12 @@ describe Google::Cloud::Language::Annotation::TextSpan do let(:text_span_hash) do { - "content": "Hello world!", - "beginOffset": -1 + content: "Hello world!", + beginOffset: -1 } end let(:text_span_json) { text_span_hash.to_json } - let(:text_span_grpc) { Google::Cloud::Language::V1beta1::TextSpan.decode_json text_span_json } + let(:text_span_grpc) { Google::Cloud::Language::V1beta1::TextSpan.decode_json text_span_json } let(:text_span) { Google::Cloud::Language::Annotation::TextSpan.from_grpc text_span_grpc } it "has attributes" do diff --git a/google-cloud-language/test/google/cloud/language/annotation/token_test.rb b/google-cloud-language/test/google/cloud/language/annotation/token_test.rb index 374aa6f5f7bb..a42b90ece3ed 100644 --- a/google-cloud-language/test/google/cloud/language/annotation/token_test.rb +++ b/google-cloud-language/test/google/cloud/language/annotation/token_test.rb @@ -31,7 +31,7 @@ } end let(:token_json) { token_hash.to_json } - let(:token_grpc) { Google::Cloud::Language::V1beta1::Token.decode_json token_json } + let(:token_grpc) { Google::Cloud::Language::V1beta1::Token.decode_json token_json } let(:token) { Google::Cloud::Language::Annotation::Token.from_grpc token_grpc } it "has attributes" do diff --git a/google-cloud-language/test/google/cloud/language/document/full_html_annotation_test.rb b/google-cloud-language/test/google/cloud/language/document/full_html_annotation_test.rb new file mode 100644 index 000000000000..b11d25bd23c7 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/document/full_html_annotation_test.rb @@ -0,0 +1,96 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Document, :full_html_annotation, :mock_language do + let(:doc) { language.document html_content, format: :html } + + it "runs full annotation" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + annotation = doc.annotate + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + doc.language = "en" + annotation = doc.annotate + mock.verify + + assert_html_annotation annotation + end + + def assert_html_annotation annotation + annotation.language.must_equal "en" + + annotation.sentiment.must_be_kind_of Google::Cloud::Language::Annotation::Sentiment + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be_kind_of ::Array + annotation.entities.class.must_equal Google::Cloud::Language::Annotation::Entities + annotation.entities.each do |entity| + entity.must_be_kind_of Google::Cloud::Language::Annotation::Entity + end + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.each do |sentence| + sentence.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + end + annotation.sentences.map(&:text).must_equal html_sentences + + annotation.tokens.each do |token| + token.must_be_kind_of Google::Cloud::Language::Annotation::Token + end + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/test/google/cloud/language/document/full_text_annotation_test.rb b/google-cloud-language/test/google/cloud/language/document/full_text_annotation_test.rb new file mode 100644 index 000000000000..787d43c4eb9d --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/document/full_text_annotation_test.rb @@ -0,0 +1,120 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Document, :full_text_annotation, :mock_language do + let(:doc) { language.document text_content } + + it "runs full annotation with content and empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + annotation = doc.annotate + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + doc.text! + annotation = doc.annotate + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + doc.text! + doc.language = :en + annotation = doc.annotate + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + doc.service.mocked_service = mock + doc.language = :en + annotation = doc.annotate + mock.verify + + assert_text_annotation annotation + end + + def assert_text_annotation annotation + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal text_sentences + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/test/google/cloud/language/document_test.rb b/google-cloud-language/test/google/cloud/language/document_test.rb new file mode 100644 index 000000000000..b181722f761c --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/document_test.rb @@ -0,0 +1,115 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Document, :mock_language do + it "can change formats" do + doc = language.document "Hello world!" + doc.must_be_kind_of Google::Cloud::Language::Document + + # It knows it is plain text and not HTML + doc.must_be :text? + doc.wont_be :html? + doc.format.must_equal :text + + # It can change format to HTML + doc.format = "html" + doc.must_be :html? + doc.wont_be :text? + doc.format.must_equal :html + + # It can change back to plain text + doc.format = :text + doc.must_be :text? + doc.wont_be :html? + doc.format.must_equal :text + + # It can change format to HTML using the helper + doc.html! + doc.must_be :html? + doc.wont_be :text? + doc.format.must_equal :html + + # It can change back to plain text using the helper + doc.text! + doc.must_be :text? + doc.wont_be :html? + doc.format.must_equal :text + end + + it "knows if it is content vs. GCS URL" do + doc = language.document "Hello world!" + doc.must_be_kind_of Google::Cloud::Language::Document + + # These are private methods + doc.must_be :content? + doc.wont_be :url? + doc.must_be :text? + doc.wont_be :html? + end + + it "knows if it is a GCS URL vs. content" do + doc = language.document "gs://bucket/path.ext" + doc.must_be_kind_of Google::Cloud::Language::Document + + # These are private methods + doc.must_be :url? + doc.wont_be :content? + doc.must_be :text? + doc.wont_be :html? + end + + it "can set the HTML format for a GCS URL" do + doc = language.document "gs://bucket/path.ext", format: :html + doc.must_be_kind_of Google::Cloud::Language::Document + + # These are private methods + doc.must_be :url? + doc.wont_be :content? + doc.must_be :html? + doc.wont_be :text? + end + + it "can derrive the HTML format from a GCS URL ending in .html" do + doc = language.document "gs://bucket/path.html" + doc.must_be_kind_of Google::Cloud::Language::Document + + # These are private methods + doc.must_be :url? + doc.wont_be :content? + doc.must_be :html? + doc.wont_be :text? + end + + it "can change languages" do + doc = language.document "Hello world!" + doc.must_be_kind_of Google::Cloud::Language::Document + + # The default language is an empty string + doc.language.must_equal "" + + # It can set language as a symbol + doc.language = :en + doc.language.must_equal "en" + + # It can set language as a string + doc.language = "jp" + doc.language.must_equal "jp" + + # It can set language to nil + doc.language = nil + doc.language.must_equal "" + end +end diff --git a/google-cloud-language/test/google/cloud/language/project/document_test.rb b/google-cloud-language/test/google/cloud/language/project/document_test.rb new file mode 100644 index 000000000000..e23979a62ea6 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/project/document_test.rb @@ -0,0 +1,112 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Project, :document, :mock_language do + it "builds a document from content" do + doc = language.document "Hello world!" + doc.must_be_kind_of Google::Cloud::Language::Document + doc.must_be :content? # private method + doc.wont_be :url? # private method + doc.must_be :text? # private method + doc.wont_be :html? # private method + doc.source.must_equal "Hello world!" # private method + end + + it "builds a document from URL" do + doc = language.document "gs://bucket/path.txt" + doc.must_be_kind_of Google::Cloud::Language::Document + doc.must_be :url? # private method + doc.wont_be :content? # private method + doc.must_be :text? # private method + doc.wont_be :html? # private method + doc.source.must_equal "gs://bucket/path.txt" # private method + end + + it "builds a document from Storage File object" do + fake = OpenStruct.new to_gs_url: "gs://bucket/path.txt" + doc = language.document fake + doc.must_be_kind_of Google::Cloud::Language::Document + doc.must_be :url? # private method + doc.wont_be :content? # private method + doc.must_be :text? # private method + doc.wont_be :html? # private method + doc.source.must_equal "gs://bucket/path.txt" # private method + end + + it "sets the format of the URL when provided" do + doc = language.document "gs://bucket/path.ext", format: :html + doc.must_be_kind_of Google::Cloud::Language::Document + doc.must_be :url? # private method + doc.wont_be :content? # private method + doc.must_be :html? # private method + doc.wont_be :text? # private method + doc.source.must_equal "gs://bucket/path.ext" # private method + end + + it "derives HTML format if the URL ends in .html" do + doc = language.document "gs://bucket/path.html" + doc.must_be_kind_of Google::Cloud::Language::Document + doc.must_be :url? # private method + doc.wont_be :content? # private method + doc.must_be :html? # private method + doc.wont_be :text? # private method + doc.source.must_equal "gs://bucket/path.html" # private method + end + + it "builds a document from another document, while maintaining formats" do + doc1 = language.document "Hello world!", format: :html + doc1.must_be_kind_of Google::Cloud::Language::Document + doc1.must_be :html? + doc1.source.must_equal "Hello world!" # private method + + doc2 = language.document doc1 + doc2.must_be_kind_of Google::Cloud::Language::Document + doc2.must_be :html? + doc2.source.must_equal "Hello world!" # private method + end + + it "builds a document from another document, while switching formats" do + doc1 = language.document "Hello world!", format: :text + doc1.must_be_kind_of Google::Cloud::Language::Document + doc1.must_be :text? + doc1.source.must_equal "Hello world!" # private method + + doc2 = language.document doc1, format: :html + doc2.must_be_kind_of Google::Cloud::Language::Document + doc2.must_be :html? + doc2.source.must_equal "Hello world!" # private method + end + + it "builds a document from another document, while maintaining language" do + doc1 = language.document "Hello world!", language: :en + doc1.must_be_kind_of Google::Cloud::Language::Document + doc1.language.must_equal "en" + + doc2 = language.document doc1 + doc2.must_be_kind_of Google::Cloud::Language::Document + doc2.language.must_equal "en" + end + + it "builds a document from another document, while switching languages" do + doc1 = language.document "Hello world!", language: :en + doc1.must_be_kind_of Google::Cloud::Language::Document + doc1.language.must_equal "en" + + doc2 = language.document doc1, language: :jp + doc2.must_be_kind_of Google::Cloud::Language::Document + doc2.language.must_equal "jp" + end +end diff --git a/google-cloud-language/test/google/cloud/language/project/full_html_annotation_test.rb b/google-cloud-language/test/google/cloud/language/project/full_html_annotation_test.rb new file mode 100644 index 000000000000..76be2d65c99c --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/project/full_html_annotation_test.rb @@ -0,0 +1,473 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Project, :full_html_annotation, :mock_language do + describe "inline content" do + it "runs full annotation with content and HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate html_content, format: :html + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with content and HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate html_content, format: :html, language: :en + mock.verify + + assert_html_annotation annotation + end + end + + describe "document object" do + it "runs full annotation with document object using HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document html_content, format: :html + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with document object using HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document html_content, format: :html, language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + describe "using #html helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.html html_content + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: html_content, type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.html html_content, language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + end + end + + describe "inline URL" do + it "runs full annotation with content and HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext", format: :html + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with content and HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext", format: :html, language: :en + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with content deriving HTML format from URL" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.html", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.html" + mock.verify + + assert_html_annotation annotation + end + end + + describe "document URL" do + it "runs full annotation with document object using HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext", format: :html + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with document object using HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext", format: :html, language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with document object deriving HTML format from URL" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.html", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.html" + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + describe "using #html helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.html "gs://bucket/path.ext" + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.html "gs://bucket/path.ext", language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + end + end + + describe "inline GCS object" do + it "runs full annotation with content and HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake, format: :html + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with content and HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake, format: :html, language: :en + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with content deriving HTML format from URL" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.html", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.html" + annotation = language.annotate gcs_fake + mock.verify + + assert_html_annotation annotation + end + end + + describe "document GCS object" do + it "runs full annotation with document object using HTML format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, format: :html + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with document object using HTML format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, format: :html, language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation with document object deriving HTML format from URL" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.html", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.html" + doc = language.document gcs_fake + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + describe "using #html helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.html gcs_fake + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :HTML, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json html_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.html gcs_fake, language: :en + annotation = language.annotate doc + mock.verify + + assert_html_annotation annotation + end + end + end + + def assert_html_annotation annotation + annotation.language.must_equal "en" + + annotation.sentiment.must_be_kind_of Google::Cloud::Language::Annotation::Sentiment + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_be_close_to 1.0 + annotation.sentiment.magnitude.must_be_close_to 1.899999976158142 + + annotation.entities.must_be_kind_of ::Array + annotation.entities.class.must_equal Google::Cloud::Language::Annotation::Entities + annotation.entities.each do |entity| + entity.must_be_kind_of Google::Cloud::Language::Annotation::Entity + end + annotation.entities.count.must_equal 2 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["chris"] + annotation.entities.locations.map(&:name).must_equal ["utah"] + annotation.entities.places.map(&:name).must_equal ["utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.each do |sentence| + sentence.must_be_kind_of Google::Cloud::Language::Annotation::TextSpan + end + annotation.sentences.map(&:text).must_equal html_sentences + + annotation.tokens.each do |token| + token.must_be_kind_of Google::Cloud::Language::Annotation::Token + end + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/test/google/cloud/language/project/full_text_annotation_test.rb b/google-cloud-language/test/google/cloud/language/project/full_text_annotation_test.rb new file mode 100644 index 000000000000..59eadffd96c8 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/project/full_text_annotation_test.rb @@ -0,0 +1,602 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Project, :full_text_annotation, :mock_language do + describe "inline content" do + it "runs full annotation with content and empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate text_content + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate text_content, format: :text + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate text_content, format: :text, language: :en + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate text_content, language: :en + mock.verify + + assert_text_annotation annotation + end + end + + describe "document object" do + it "runs full annotation with document object using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document text_content + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document text_content, format: :text + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document text_content, format: :text, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document text_content, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + describe "using #text helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.text text_content + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + content: text_content, type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.text text_content, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + end + end + + describe "inline URL" do + it "runs full annotation with content and empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext" + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext", format: :text + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext", format: :text, language: :en + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + annotation = language.annotate "gs://bucket/path.ext", language: :en + mock.verify + + assert_text_annotation annotation + end + end + + describe "document URL object" do + it "runs full annotation with document object using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext" + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext", format: :text + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext", format: :text, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.document "gs://bucket/path.ext", language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + describe "using #text helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.text "gs://bucket/path.ext" + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + doc = language.text "gs://bucket/path.ext", language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + end + end + + describe "inline GCS object" do + it "runs full annotation with content and empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake, format: :text + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake, format: :text, language: :en + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with content and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + annotation = language.annotate gcs_fake, language: :en + mock.verify + + assert_text_annotation annotation + end + end + + describe "document GSC object" do + it "runs full annotation with document object using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, format: :text + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using TEXT format and en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, format: :text, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation with document object using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + describe "using #text helper" do + it "runs full annotation using empty options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT) + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + + it "runs full annotation using en language options" do + grpc_doc = Google::Cloud::Language::V1beta1::Document.new( + gcs_content_uri: "gs://bucket/path.ext", type: :PLAIN_TEXT, language: "en") + features = Google::Cloud::Language::V1beta1::AnnotateTextRequest::Features.new( + extract_syntax: true, extract_entities: true, extract_document_sentiment: true) + grpc_resp = Google::Cloud::Language::V1beta1::AnnotateTextResponse.decode_json text_json + + mock = Minitest::Mock.new + mock.expect :annotate_text, grpc_resp, [grpc_doc, features, :UTF8] + + language.service.mocked_service = mock + gcs_fake = OpenStruct.new to_gs_url: "gs://bucket/path.ext" + doc = language.document gcs_fake, language: :en + annotation = language.annotate doc + mock.verify + + assert_text_annotation annotation + end + end + end + + def assert_text_annotation annotation + annotation.language.must_equal "en" + + annotation.sentiment.language.must_equal "en" + annotation.sentiment.polarity.must_equal 1.0 + annotation.sentiment.magnitude.must_equal 2.0999999046325684 + + annotation.entities.count.must_equal 3 + annotation.entities.language.must_equal "en" + annotation.entities.unknown.map(&:name).must_equal [] + annotation.entities.people.map(&:name).must_equal ["Chris", "Mike"] + annotation.entities.locations.map(&:name).must_equal ["Utah"] + annotation.entities.places.map(&:name).must_equal ["Utah"] + annotation.entities.organizations.map(&:name).must_equal [] + annotation.entities.events.map(&:name).must_equal [] + annotation.entities.artwork.map(&:name).must_equal [] + annotation.entities.goods.map(&:name).must_equal [] + annotation.entities.other.map(&:name).must_equal [] + + annotation.sentences.map(&:text).must_equal text_sentences + annotation.tokens.count.must_equal 24 + token = annotation.tokens.first + token.text.must_equal "Hello" + token.part_of_speech.must_equal :X + token.head_token_index.must_equal 0 + token.label.must_equal :ROOT + token.lemma.must_equal "Hello" + end +end diff --git a/google-cloud-language/test/google/cloud/language/project_test.rb b/google-cloud-language/test/google/cloud/language/project_test.rb new file mode 100644 index 000000000000..ce7cc1c0d404 --- /dev/null +++ b/google-cloud-language/test/google/cloud/language/project_test.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "helper" + +describe Google::Cloud::Language::Project, :mock_language do + it "knows the project identifier" do + language.must_be_kind_of Google::Cloud::Language::Project + language.project.must_equal project + end +end