Skip to content

Commit

Permalink
Switch URLs from MediaFire to GitHub Releases.
Browse files Browse the repository at this point in the history
  • Loading branch information
faustomorales committed Sep 13, 2020
1 parent 6b11251 commit c8a4137
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 30 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ WORKDIR /usr/src
# change.
RUN mkdir -p /root/.keras-ocr && ( \
cd /root/.keras-ocr && \
curl -L -o craft_mlt_25k.h5 https://www.mediafire.com/file/mepzf3sq7u7nve9/craft_mlt_25k.h5/file && \
curl -L -o crnn_kurapan.h5 https://www.mediafire.com/file/pkj2p29b1f6fpil/crnn_kurapan.h5/file \
curl -L -o craft_mlt_25k.h5 https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/craft_mlt_25k.h5 && \
curl -L -o crnn_kurapan.h5 https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/crnn_kurapan.h5 \
)
COPY ./Pipfile* ./
COPY ./Makefile ./
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ You may be wondering how the models in this package compare to existing cloud OC

| model | latency | precision | recall |
|-----------------------|---------|-----------|--------|
| [AWS](https://www.mediafire.com/file/7obsgyzg7z1ltb0/aws_annotations.json/file) | 719ms | 0.45 | 0.48 |
| [GCP](https://www.mediafire.com/file/8is5pq161ui95ox/google_annotations.json/file) | 388ms | 0.53 | 0.58 |
| [keras-ocr](https://www.mediafire.com/file/1gcwtrzy537v0sn/keras_ocr_annotations_scale_2.json/file) (scale=2) | 417ms | 0.53 | 0.54 |
| [keras-ocr](https://www.mediafire.com/file/dc7e66oupelsp7p/keras_ocr_annotations_scale_3.json/file) (scale=3) | 699ms | 0.5 | 0.59 |
| [AWS](https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/aws_annotations.json) | 719ms | 0.45 | 0.48 |
| [GCP](https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/google_annotations.json) | 388ms | 0.53 | 0.58 |
| [keras-ocr](https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/keras_ocr_annotations_scale_2.json) (scale=2) | 417ms | 0.53 | 0.54 |
| [keras-ocr](https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/keras_ocr_annotations_scale_3.json) (scale=3) | 699ms | 0.5 | 0.59 |

- Precision and recall were computed based on an intersection over union of 50% or higher and a text similarity to ground truth of 50% or higher.
- `keras-ocr` latency values were computed using a Tesla P4 GPU on Google Colab. `scale` refers to the argument provided to `keras_ocr.pipelines.Pipeline()` which determines the upscaling applied to the image prior to inference.
Expand Down
10 changes: 5 additions & 5 deletions keras_ocr/data_generation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pylint: disable=invalid-name,too-many-locals,too-many-arguments,too-many-branches,too-many-statements,stop-iteration-return
# pylint: disable=invalid-name,line-too-long,too-many-locals,too-many-arguments,too-many-branches,too-many-statements,stop-iteration-return
import os
import math
import glob
Expand Down Expand Up @@ -168,7 +168,7 @@ def get_backgrounds(cache_dir=None):
cache_dir = os.path.expanduser(os.path.join('~', '.keras-ocr'))
backgrounds_dir = os.path.join(cache_dir, 'backgrounds')
backgrounds_zip_path = tools.download_and_verify(
url='https://www.mediafire.com/file/l0pdx5j860kqmyr/backgrounds.zip/file',
url='https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/backgrounds.zip',
sha256='f263ed0d55de303185cc0f93e9fcb0b13104d68ed71af7aaaa8e8c91389db471',
filename='backgrounds.zip',
cache_dir=cache_dir)
Expand Down Expand Up @@ -199,7 +199,7 @@ def get_fonts(cache_dir=None,
if cache_dir is None:
cache_dir = os.path.expanduser(os.path.join('~', '.keras-ocr'))
fonts_zip_path = tools.download_and_verify(
url='https://www.mediafire.com/file/6v9r9oztyri0jrc/fonts.zip/file',
url='https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/fonts.zip',
sha256='d4d90c27a9bc4bf8fff1d2c0a00cfb174c7d5d10f60ed29d5f149ef04d45b700',
filename='fonts.zip',
cache_dir=cache_dir)
Expand All @@ -212,7 +212,8 @@ def get_fonts(cache_dir=None,
if exclude_smallcaps:
with open(
tools.download_and_verify(
url='https://www.mediafire.com/file/v2o7hxn0mapne7i/fonts_smallcaps.txt/file',
url=
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/fonts_smallcaps.txt',
sha256='6531c700523c687f02852087530d1ab3c7cc0b59891bbecc77726fbb0aabe68e',
filename='fonts_smallcaps.txt',
cache_dir=cache_dir), 'r') as f:
Expand Down Expand Up @@ -312,7 +313,6 @@ def draw_text_image(text,
box is an array of points with shape (4, 2) providing the coordinates
of the character box in clockwise order starting from the top left.
"""
# pylint: disable=bad-continuation
if not use_ligatures:
fonts = {
subalphabet: PIL.ImageFont.truetype(font_path, size=fontsize)
Expand Down
26 changes: 19 additions & 7 deletions keras_ocr/datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=invalid-name,too-many-arguments,too-many-locals
# pylint: disable=line-too-long,invalid-name,too-many-arguments,too-many-locals
import concurrent
import itertools
import warnings
import zipfile
import random
import glob
Expand Down Expand Up @@ -120,7 +121,7 @@ def get_born_digital_recognizer_dataset(split='train', cache_dir=None):
train_dir = os.path.join(main_dir, 'train')
training_zip_path = tools.download_and_verify(
url=
'https://www.mediafire.com/file/ybj0uo196rushhn/Challenge1_Training_Task3_Images_GT.zip/file', # pylint: disable=line-too-long
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/Challenge1_Training_Task3_Images_GT.zip', # pylint: disable=line-too-long
filename='Challenge1_Training_Task3_Images_GT.zip',
cache_dir=main_dir,
sha256='8ede0639f5a8031d584afd98cee893d1c5275d7f17863afc2cba24b13c932b07')
Expand All @@ -136,15 +137,16 @@ def get_born_digital_recognizer_dataset(split='train', cache_dir=None):
test_dir = os.path.join(main_dir, 'test')
test_zip_path = tools.download_and_verify(
url=
'https://www.mediafire.com/file/nesckvjulvzpb2i/Challenge1_Test_Task3_Images.zip/file',
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/Challenge1_Test_Task3_Images.zip',
filename='Challenge1_Test_Task3_Images.zip',
cache_dir=main_dir,
sha256='8f781b0140fd0bac3750530f0924bce5db3341fd314a2fcbe9e0b6ca409a77f0')
if len(glob.glob(os.path.join(test_dir, '*.png'))) != 1439:
with zipfile.ZipFile(test_zip_path) as zfile:
zfile.extractall(test_dir)
test_gt_path = tools.download_and_verify(
url='https://www.mediafire.com/file/euuuwsgg7z4pcb2/Challenge1_Test_Task3_GT.txt/file',
url=
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/Challenge1_Test_Task3_GT.txt',
cache_dir=test_dir,
filename='Challenge1_Test_Task3_GT.txt',
sha256='fce7f1228b7c4c26a59f13f562085148acf063d6690ce51afc395e0a1aabf8be')
Expand Down Expand Up @@ -194,7 +196,7 @@ def get_icdar_2013_detector_dataset(cache_dir=None, skip_illegible=False):
training_images_dir = os.path.join(main_dir, 'Challenge2_Training_Task12_Images')
training_zip_images_path = tools.download_and_verify(
url=
'https://www.mediafire.com/file/l8ct7ckudg12ln6/Challenge2_Training_Task12_Images.zip/file', # pylint: disable=line-too-long
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/Challenge2_Training_Task12_Images.zip', # pylint: disable=line-too-long
cache_dir=main_dir,
filename='Challenge2_Training_Task12_Images.zip',
sha256='7a57d1699fbb92db3ad82c930202938562edaf72e1c422ddd923860d8ace8ded')
Expand All @@ -203,7 +205,8 @@ def get_icdar_2013_detector_dataset(cache_dir=None, skip_illegible=False):
zfile.extractall(training_images_dir)
training_gt_dir = os.path.join(main_dir, 'Challenge2_Training_Task2_GT')
training_zip_gt_path = tools.download_and_verify(
url='https://www.mediafire.com/file/rpfphmxvudn5v3y/Challenge2_Training_Task2_GT.zip/file', # pylint: disable=line-too-long
url=
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/Challenge2_Training_Task2_GT.zip', # pylint: disable=line-too-long
cache_dir=main_dir,
filename='Challenge2_Training_Task2_GT.zip',
sha256='4cedd5b1e33dc4354058f5967221ac85dbdf91a99b30f3ab1ecdf42786a9d027')
Expand Down Expand Up @@ -245,6 +248,14 @@ def get_icdar_2019_semisupervised_dataset(cache_dir=None):
Args:
cache_dir: The cache directory to use.
"""
warnings.warn(
"You may need to get this dataset manually in-browser by downloading "
"https://www.mediafire.com/file/snekaezeextc3ee/ImagesPart1.zip/file "
"and https://www.mediafire.com/file/i2snljkfm4t2ojm/ImagesPart2.zip/file "
"and putting them in ~/.keras-ocr/icdar2019. The files are too big "
"for GitHub Releases and we may run out of direct download bandwidth on "
"MediaFire where they are hosted. See "
"https://github.com/faustomorales/keras-ocr/issues/117 for more details.", UserWarning)
if cache_dir is None:
cache_dir = tools.get_default_cache_dir()
main_dir = os.path.join(cache_dir, 'icdar2019')
Expand All @@ -267,7 +278,8 @@ def get_icdar_2019_semisupervised_dataset(cache_dir=None):
with zipfile.ZipFile(training_zip_2) as zfile:
zfile.extractall(main_dir)
ground_truth = tools.download_and_verify(
url='http://www.mediafire.com/file/jshjv9kntxjzhva/mlt2019_dataset.json/file', # pylint: disable=line-too-long
url=
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/mlt2019_dataset.json', # pylint: disable=line-too-long
cache_dir=main_dir,
filename='mlt2019_dataset.json')
with open(ground_truth, 'r') as f:
Expand Down
14 changes: 8 additions & 6 deletions keras_ocr/detection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pylint: disable=invalid-name,too-many-locals,no-else-raise,too-many-arguments,no-self-use,too-many-statements,stop-iteration-return,import-outside-toplevel
# pylint: disable=invalid-name,too-many-locals,line-too-long,no-else-raise,too-many-arguments,no-self-use,too-many-statements,stop-iteration-return,import-outside-toplevel
import typing

# The PyTorch portions of this code are subject to the following copyright notice.
Expand Down Expand Up @@ -426,7 +426,7 @@ def init_weights(modules):

class vgg16_bn(torch.nn.Module):
def __init__(self, pretrained=True, freeze=True):
super(vgg16_bn, self).__init__()
super().__init__()
# We don't bother loading the pretrained VGG
# because we're going to use the weights
# at weights_path.
Expand Down Expand Up @@ -481,7 +481,7 @@ def forward(self, X): # pylint: disable=arguments-differ

class double_conv(nn.Module):
def __init__(self, in_ch, mid_ch, out_ch):
super(double_conv, self).__init__()
super().__init__()
self.conv = nn.Sequential(nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
nn.BatchNorm2d(mid_ch), nn.ReLU(inplace=True),
nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
Expand All @@ -493,7 +493,7 @@ def forward(self, x): # pylint: disable=arguments-differ

class CRAFT(nn.Module):
def __init__(self, pretrained=False, freeze=False):
super(CRAFT, self).__init__()
super().__init__()
# Base network
self.basenet = vgg16_bn(pretrained, freeze)
# U network
Expand Down Expand Up @@ -567,12 +567,14 @@ def copyStateDict(state_dict):

PRETRAINED_WEIGHTS = {
('clovaai_general', True): {
'url': 'https://www.mediafire.com/file/qh2ullnnywi320s/craft_mlt_25k.pth/file',
'url':
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/craft_mlt_25k.pth',
'filename': 'craft_mlt_25k.pth',
'sha256': '4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17'
},
('clovaai_general', False): {
'url': 'https://www.mediafire.com/file/mepzf3sq7u7nve9/craft_mlt_25k.h5/file',
'url':
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/craft_mlt_25k.h5',
'filename': 'craft_mlt_25k.h5',
'sha256': '7283ce2ff05a0617e9740c316175ff3bacdd7215dbdf1a726890d5099431f899'
}
Expand Down
8 changes: 5 additions & 3 deletions keras_ocr/recognition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pylint: disable=invalid-name,too-many-locals,too-many-arguments
# pylint: disable=invalid-name,too-many-locals,too-many-arguments,line-too-long
import typing
import string

Expand Down Expand Up @@ -29,12 +29,14 @@
'build_params': DEFAULT_BUILD_PARAMS,
'weights': {
'notop': {
'url': 'https://www.mediafire.com/file/n9yfn5wueu82rgf/crnn_kurapan_notop.h5/file',
'url':
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/crnn_kurapan_notop.h5',
'filename': 'crnn_kurapan_notop.h5',
'sha256': '027fd2cced3cbea0c4f5894bb8e9e85bac04f11daf96b8fdcf1e4ee95dcf51b9'
},
'top': {
'url': 'https://www.mediafire.com/file/pkj2p29b1f6fpil/crnn_kurapan.h5/file',
'url':
'https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/crnn_kurapan.h5',
'filename': 'crnn_kurapan.h5',
'sha256': 'a7d8086ac8f5c3d6a0a828f7d6fbabcaf815415dd125c32533013f85603be46d'
}
Expand Down
1 change: 0 additions & 1 deletion keras_ocr/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,6 @@ def download_and_verify(url, sha256=None, cache_dir=None, verbose=True, filename
return filepath


# pylint: disable=bad-continuation
def get_rotated_box(
points
) -> typing.Tuple[typing.Tuple[float, float], typing.Tuple[float, float], typing.Tuple[
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pytorch_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
def test_pytorch_identical_output():
import torch # pylint: disable=import-outside-toplevel
weights_path_torch = keras_ocr.tools.download_and_verify(
url='https://www.mediafire.com/file/qh2ullnnywi320s/craft_mlt_25k.pth/file',
url='https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/craft_mlt_25k.pth',
filename='craft_mlt_25k.pth',
sha256='4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17')
weights_path_keras = keras_ocr.tools.download_and_verify(
url='https://www.mediafire.com/file/mepzf3sq7u7nve9/craft_mlt_25k.h5/file',
url='https://github.com/faustomorales/keras-ocr/releases/download/v0.8.4/craft_mlt_25k.h5',
filename='craft_mlt_25k.h5',
sha256='7283ce2ff05a0617e9740c316175ff3bacdd7215dbdf1a726890d5099431f899')

Expand Down

0 comments on commit c8a4137

Please sign in to comment.