Skip to content

Commit

Permalink
Merge 5d69475 into 5642703
Browse files Browse the repository at this point in the history
  • Loading branch information
dannnylo committed Apr 4, 2020
2 parents 5642703 + 5d69475 commit b4c7d82
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 72 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/ci.yml
@@ -0,0 +1,32 @@
name: CI
on: [push]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
ruby:
- '2.5.x'
- '2.6.x'
- '2.7.x'
steps:
steps:
- uses: actions/checkout@v2
- name: Install tesseract-ocr
run: |
sudo add-apt-repository ppa:alex-p/tesseract-ocr -y
sudo apt-get update -q
sudo apt-get install tesseract-ocr tesseract-ocr-eng ghostscript -y
- name: Setup Ruby
uses: actions/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
- name: Bundle
env:
MTSR_RAILS_VERSION: ${{ matrix.rails }}
run: |
gem uninstall -aIx bundler
gem install bundler
bundle install --jobs 4 --retry 3
- name: Test
run: bundle exec rake
12 changes: 7 additions & 5 deletions spec/rtesseract/box_spec.rb
Expand Up @@ -3,12 +3,14 @@
RSpec.describe RTesseract::Box do
let(:path) { Pathname.new(File.dirname(__FILE__)).join('..') }
let(:words_image) { path.join('resources', 'test_words.png').to_s }
let(:words) { ['If', 'you', 'are', 'a', 'friend,', 'you', 'speak', 'the', 'password,', 'and', 'the', 'doors', 'will', 'open.'] }
let(:instance) { RTesseract.new(words_image) }

it 'bounding box' do
expect(RTesseract.new(words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\f")
expect(RTesseract.new(words_image).to_box).to include(word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31)
it 'returns the list of words' do
expect(instance.words).to eql(words)
end

words = ['If', 'you', 'are', 'a', 'friend,', 'you', 'speak', 'the', 'password,', 'and', 'the', 'doors', 'will', 'open.']
expect(RTesseract.new(words_image).words).to eql(words)
it 'bounding box' do
expect(instance.to_box).to include(word: 'you', x_start: 69, y_start: 17, x_end: 100, y_end: 31)
end
end
41 changes: 31 additions & 10 deletions spec/rtesseract/configuration_spec.rb
Expand Up @@ -3,18 +3,39 @@
RSpec.describe RTesseract do
let(:path) { Pathname.new(File.dirname(__FILE__)).join('..') }

it ' support default config' do
RTesseract.configure { |config| config.psm = 7 }
context 'with global spm' do
before { described_class.configure { |config| config.psm = 7 } }

expect(RTesseract.config.psm).to eql(7)
expect(RTesseract.new(path, psm: 2).config.psm).to eql(2)
it 'gets the global psm value' do
expect(described_class.config.psm).to be(7)
end

expect(RTesseract.config.command).to eql('tesseract')
expect(RTesseract.new(path, command: '/usr/bin/tesseract4').config.command).to eql('/usr/bin/tesseract4')
it 'gets instance psm value' do
expect(described_class.new(path, psm: 2).config.psm).to be(2)
end
end

context 'with default command' do
it 'gets the global psm value' do
expect(described_class.config.command).to eql('tesseract')
end

it 'gets instance command value' do
expect(described_class.new(path, command: '/usr/bin/tesseract4').config.command).to eql('/usr/bin/tesseract4')
end
end

context 'with other options' do
it 'allows to setup oem' do
expect(described_class.new(path, oem: 1).config.oem).to be(1)
end

it 'allows to setup lang' do
expect(described_class.new(path, lang: 'eng').config.lang).to eql('eng')
end

expect(RTesseract.new(path, psm: 2).config.psm).to eql(2)
expect(RTesseract.new(path, oem: 1).config.oem).to eql(1)
expect(RTesseract.new(path, lang: 'eng').config.lang).to eql('eng')
expect(RTesseract.new(path, lang: 'eng+por').config.lang).to eql('eng+por')
it 'allows to setup multiple langs' do
expect(described_class.new(path, lang: 'eng+por').config.lang).to eql('eng+por')
end
end
end
20 changes: 11 additions & 9 deletions spec/rtesseract/pdf_spec.rb
Expand Up @@ -2,17 +2,19 @@

RSpec.describe RTesseract::Pdf do
let(:path) { Pathname.new(File.dirname(__FILE__)).join('..') }
let(:words_image) { path.join('resources', 'test-pdf.png').to_s }
let(:file) { RTesseract.new(words_image).to_pdf }

let(:image_pdf_path) { path.join('resources', 'test-pdf.png').to_s }

it ' support pdf output mode' do
pdf_ocr = RTesseract.new(image_pdf_path).to_pdf

expect(File.extname(pdf_ocr.path)).to eql('.pdf')
expect(File.exist?(pdf_ocr.path)).to be_truthy
after do
file.close
File.unlink(file)
end

pdf_ocr.close
it 'returns a file with extension .pdf' do
expect(File.extname(file.path)).to eql('.pdf')
end

File.unlink(pdf_ocr)
it 'checks if file pdf exisits' do
expect(File).to exist(file.path)
end
end
37 changes: 37 additions & 0 deletions spec/rtesseract/text_spec.rb
@@ -0,0 +1,37 @@
# frozen_string_literal: true

RSpec.describe RTesseract::Text do
let(:path) { Pathname.new(File.dirname(__FILE__)).join('..') }
let(:image_path) { path.join('resources', 'test.tif').to_s }
let(:pdf_path) { path.join('resources', 'test.tif').to_s }

let(:words_image) { path.join('resources', 'test_words.png').to_s }

it 'translate image to text' do
expect(RTesseract.new(image_path).to_s_without_spaces).to eql('43XF')
end

it 'translate tif image to text' do
expect(RTesseract.new(path.join('resources', 'test1.tif').to_s).to_s_without_spaces).to eql('V2V4')
end

it 'translate tif image with spaces to text' do
expect(RTesseract.new(path.join('resources', 'test with spaces.tif').to_s).to_s_without_spaces).to eql('V2V4')
end

it 'translate png image with spaces to text' do
expect(RTesseract.new(path.join('resources', 'test.png').to_s).to_s_without_spaces).to eql('HW9W')
end

it 'translate jpg image with spaces to text' do
expect(RTesseract.new(path.join('resources', 'test.jpg').to_s).to_s_without_spaces).to eql('3R8F')
end

it 'translate image to text with options' do
expect(RTesseract.new(image_path, psm: 7, oem: 1).to_s_without_spaces).to eql('43XF')
end

it 'tests output text' do
expect(RTesseract.new(words_image).to_s).to eql("If you are a friend,\nyou speak the password,\nand the doors will open.\n\f")
end
end
18 changes: 10 additions & 8 deletions spec/rtesseract/tsv_spec.rb
@@ -1,18 +1,20 @@
# frozen_string_literal: true

require 'csv'

RSpec.describe RTesseract::Tsv do
let(:path) { Pathname.new(File.dirname(__FILE__)).join('..') }
let(:words_image) { path.join('resources', 'test_words.png').to_s }
let(:file) { RTesseract.new(words_image).to_tsv }

it ' support tsv output mode' do
tsv_ocr = RTesseract.new(words_image).to_tsv
after do
file.close
File.unlink(file)
end

expect(File.extname(tsv_ocr.path)).to eql('.tsv')
expect(tsv_ocr.read).to include('level page_num block_num par_num line_num word_num left top width height conf text')
it 'returns a file with extension .tsv' do
expect(File.extname(file.path)).to eql('.tsv')
end

tsv_ocr.close
File.unlink(tsv_ocr)
it ' support tsv output mode' do
expect(file.read).to include('level page_num block_num par_num line_num word_num left top width height conf text')
end
end
75 changes: 36 additions & 39 deletions spec/rtesseract_spec.rb
Expand Up @@ -5,60 +5,57 @@
let(:image_path) { path.join('resources', 'test.tif').to_s }
let(:pdf_path) { path.join('resources', 'test.tif').to_s }

it 'has a version number' do
expect(RTesseract::VERSION).not_to be nil

expect(RTesseract.tesseract_version).to be > 3.05
it 'returns the tesseract version' do
expect(described_class.tesseract_version).to be > 3.05
end

it 'be instantiable' do
expect(RTesseract.new.class).to eql(RTesseract)
expect(RTesseract.new('').class).to eql(RTesseract)
expect(RTesseract.new(image_path).class).to eql(RTesseract)
it 'be instantiable without path' do
expect(described_class.new.class).to eql(described_class)
end

it 'translate image to text' do
expect(RTesseract.new(image_path).to_s_without_spaces).to eql('43XF')
{
'test1.tif' => 'V2V4',
'test with spaces.tif' => 'V2V4',
'test.png' => 'HW9W',
'test.jpg' => '3R8F'
}.each do |file, value|
expect(RTesseract.new(path.join('resources', file).to_s).to_s_without_spaces).to eql(value)
end
it 'be instantiable with blank string' do
expect(described_class.new('').class).to eql(described_class)
end

it 'translate image to text with options' do
expect(RTesseract.new(image_path, psm: 7, oem: 1).to_s_without_spaces).to eql('43XF')
it 'be instantiable with a path' do
expect(described_class.new(image_path).class).to eql(described_class)
end

it 'get tesseract version' do
expect(RTesseract.tesseract_version).to be > 0
context 'when tesseract not installed' do
before do
described_class.configure { |config| config.command = 'tesseract_not_installed' }
end

RTesseract.configure { |config| config.command = 'tesseract_not_installed' }
it 'returns zero on #tesseract_version' do
expect(described_class.tesseract_version).to be(0)
end

expect(RTesseract.tesseract_version).to eql(0)
it 'raise a error if tesseract version < 3.05' do
expect { described_class.check_version! }.to raise_error(RTesseract::Error)
end
end

it 'raise a error if tesseract version < 3.05' do
RTesseract.configure { |config| config.command = 'tesseract_not_installed' }
context 'without source' do
let(:instance) { described_class.new }

expect { RTesseract.check_version! }.to raise_error(RTesseract::Error)
end

it 'raise a error when tesseract raise a error' do
expect { RTesseract.new.to_s }.to raise_error(RTesseract::Error)
it 'raise an exception' do
aggregate_failures 'raise an exception' do
expect { instance.to_s }.to raise_error(RTesseract::Error)
expect(instance.errors.first).to include('Error during processing')
end
end
end

it 'store the error on a variable to debug' do
instance = RTesseract.new
expect { instance.to_s }.to raise_error(RTesseract::Error)
expect(instance.errors.first).to include('Error during processing')

error_intance = RTesseract.new(path.join('resources', 'image_with_error.png').to_s)
context 'with errors on image' do
let(:error_intance) do
described_class.new(path.join('resources', 'image_with_error.png').to_s)
end

expect(error_intance.to_s_without_spaces).to eql('RTX-0003-03-02-01PRE')
expect(error_intance.errors).to eql(["Error in boxClipToRectangle: box outside rectangle\nError in pixScanForForeground: invalid box\n"])
it 'stores the error on a variable to debug' do
aggregate_failures 'stores the error on a variable to debug' do
expect(error_intance.to_s_without_spaces).to eql('RTX-0003-03-02-01PRE')
expect(error_intance.errors).to eql(["Error in boxClipToRectangle: box outside rectangle\nError in pixScanForForeground: invalid box\n"])
end
end
end
end
2 changes: 1 addition & 1 deletion spec/spec_helper.rb
Expand Up @@ -20,7 +20,7 @@
c.syntax = :expect
end

config.before(:each) do
config.before do
RTesseract.reset_config!
end
end

0 comments on commit b4c7d82

Please sign in to comment.