Skip to content

Commit

Permalink
Merge pull request #1 from arthurhenrique/test-perf
Browse files Browse the repository at this point in the history
chore(*): performance test
  • Loading branch information
arthurhenrique committed Nov 28, 2020
2 parents 7d55569 + 8b16690 commit 5dc15a2
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.15.3-alpine3.12
FROM golang:1.15.5-alpine3.12
RUN apk add --update --no-cache --virtual wget-dependencies \
ca-certificates \
openssl \
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

POC to test lib tesseract with portuguese and english language.

edit: now just portuguese for performance:
[test perf](https://github.com/arthurhenrique/go-ocr/pull/1)

## Notes

This works properly using images with 300 dpi <= x <= 600 dpi.
Expand Down
5 changes: 1 addition & 4 deletions convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@ package main

import (
"bufio"
"bytes"
"fmt"
"image"
"image/color"
"image/jpeg"
"os"
)

// convertToBytes return bytes
func convertToBytes(fileName string) []byte {
file, err := os.Open(fileName)
if err != nil {
Expand Down
2 changes: 0 additions & 2 deletions detection.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ package main

import (
"strings"
"sync"
"unicode"

"github.com/otiai10/gosseract/v2"
)

var once sync.Once
var instance *gosseract.Client

// TextMethod contains configuration of tesseract client
Expand Down
Binary file added files/bode.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
72 changes: 44 additions & 28 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,54 @@ package main

import (
"fmt"
"log"
"time"

"github.com/otiai10/gosseract/v2"
)

func main() {

fileName := "files/2.jpg"

instance = gosseract.NewClient()

tm := TextMethod{
Name: "tesseract",
Language: "por+eng",
Variables: map[string]string{
"tessedit_pageseg_mode": "3", // auto page segmentation mode
"load_system_dawg": "0", // removing dict to increase recognition
"load_freq_dawg": "0",
},
Client: instance,
}
defer instance.Close()

// Handler tesseract's settings
tm.tesseractSettings()

bytesImage := convertToBytes(fileName)
object, err := tm.extract(bytesImage)
if err != nil {
return
}

if object != nil {
fmt.Print(*object)
iterations := 50
total := time.Duration(0)

for i := 0; i < iterations; i++ {
start := time.Now()

fileName := "files/bode.jpg"

// fast model
// https://github.com/tesseract-ocr/tessdata_fast/raw/master/por.tra
instance = gosseract.NewClient()

tm := TextMethod{
Name: "tesseract",
Language: "por",
Variables: map[string]string{
"tessedit_pageseg_mode": "3", // auto page segmentation mode
"load_system_dawg": "0", // removing dict to increase recognition
"load_freq_dawg": "0",
},
Client: instance,
}
defer instance.Close()

// Handler tesseract's settings
tm.tesseractSettings()

bytesImage := convertToBytes(fileName)
object, err := tm.extract(bytesImage)
if err != nil {
return
}

if object != nil {
fmt.Println(*object)
}
elapsed := time.Since(start)
log.Printf("took %s", elapsed)
total += elapsed
}
log.Printf("All Duration: %s", total)
log.Printf("loops: %d", iterations)
log.Printf("Average: %s", total/time.Duration(iterations))
}

0 comments on commit 5dc15a2

Please sign in to comment.