Skip to content
This repository has been archived by the owner on May 14, 2021. It is now read-only.

Commit

Permalink
Merge pull request #18 from gini/text_support
Browse files Browse the repository at this point in the history
Basic implementation of the txt upload feature
  • Loading branch information
dkerwin committed May 11, 2015
2 parents c2d80cb + 62c58e5 commit c94c669
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ doc = api.upload(fh)
# => Gini::Api::Document
doc = api.upload('tmp/my_receipt.pdf', doctype_hint='Receipt')
# => Gini::Api::Document
doc = api.upload('This is a utf-8 text message i would love to get extractions from', text: true)
# => Gini::Api::Document
doc = api.upload('/tmp/my_doc.txt')
# => Gini::Api::Document
doc.id
# => "123456789-abcd-ef12-000000000000"
doc.progress
Expand Down
30 changes: 22 additions & 8 deletions lib/gini-api/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,28 +149,38 @@ def request(verb, resource, options = {})
# Upload a document
#
# @param [String] file path or open filehandle of the document to upload
# @param [String] doctype_hint Document type hint to optimize results or get incubator results
# @param [Float] interval Interval to poll progress
# @param [Hash] options Hash of available upload settings
# @option options [String] :doctype_hint Document type hint to optimize results or get incubator results
# @option options [String] :text Use given file-string as text upload
# @option options [Float] :interval Interval to poll progress
#
# @return [Gini::Api::Document] Return Gini::Api::Document object for uploaded document
#
# @example Upload and wait for completion
# doc = api.upload('/tmp/myfile.pdf')
# @example Upload with doctype hint
# doc = api.upload('/tmp/myfile.pdf', doctype_hint='Receipt')
# doc = api.upload('/tmp/myfile.pdf', doctype_hint: 'Receipt')
# @example Upload and monitor progress
# doc = api.upload('/tmp/myfile.pdf') { |d| puts "Progress: #{d.progress}" }
# @example Upload and monitor progress
# doc = api.upload('This is a text message i would love to get extractions from', text: true)
#
def upload(file, doctype_hint = nil, interval = 0.5, &block)
def upload(file, options = {}, &block)
opts = {
doctype_hint: nil,
text: false,
interval: 0.5
}.merge(options)

duration = Hash.new(0)

# Document upload
duration[:upload], response = upload_document(file, doctype_hint)
duration[:upload], response = upload_document(file, opts)

# Start polling (0.5s) when document has been uploaded successfully
if response.status == 201
doc = Gini::Api::Document.new(self, response.headers['location'])
duration[:processing] = poll_document(doc, interval, &block)
duration[:processing] = poll_document(doc, opts[:interval], &block)

duration[:total] = duration.values.inject(:+)
doc.duration = duration
Expand Down Expand Up @@ -319,13 +329,17 @@ def upload_connection
#
# @return [Faraday::Response] Response object from upload
#
def upload_document(file, doctype_hint)
def upload_document(file, opts)
response = nil

# Use StringIO on file string and force utf-8
file = StringIO.new(file.force_encoding('UTF-8')) if opts[:text]

duration = Benchmark.realtime do
response = upload_connection.post do |req|
req.options[:timeout] = @upload_timeout
req.url 'documents'
req.params[:doctype] = doctype_hint if doctype_hint
req.params[:doctype] = opts[:doctype_hint] if opts[:doctype_hint]
req.headers['Content-Type'] = 'multipart/form-data'
req.headers['Authorization'] = "Bearer #{@token.token}"
req.headers.merge!(version_header)
Expand Down

0 comments on commit c94c669

Please sign in to comment.