Skip to content

Commit

Permalink
black and flake
Browse files Browse the repository at this point in the history
  • Loading branch information
interrogator committed May 27, 2020
1 parent a622106 commit cbf4f8b
Show file tree
Hide file tree
Showing 19 changed files with 134 additions and 107 deletions.
16 changes: 8 additions & 8 deletions buzzword/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,14 @@
MARTOR_MARKDOWN_EXTENSIONS = [
"markdown.extensions.extra",
"markdown.extensions.nl2br",
#'markdown.extensions.smarty',
#'markdown.extensions.fenced_code',
# Custom markdown extensions.
#'martor.extensions.urlize',
#'martor.extensions.del_ins', # ~~strikethrough~~ and ++underscores++
#'martor.extensions.mention', # to parse markdown mention
#'martor.extensions.emoji', # to parse markdown emoji
#'martor.extensions.mdx_video', # to parse embed/iframe video
# 'markdown.extensions.smarty',
# 'markdown.extensions.fenced_code',
# Custom markdown extensions.
# 'martor.extensions.urlize',
# 'martor.extensions.del_ins', # ~~strikethrough~~ and ++underscores++
# 'martor.extensions.mention', # to parse markdown mention
# 'martor.extensions.emoji', # to parse markdown emoji
# 'martor.extensions.mdx_video', # to parse embed/iframe video
]

# CSRF_COOKIE_HTTPONLY = False
Expand Down
1 change: 0 additions & 1 deletion compare/forms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from django import forms

from martor.fields import MartorFormField
from .models import Post


Expand Down
20 changes: 14 additions & 6 deletions compare/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def _get_ocr_engine(lang):
"""
tools = pyocr.get_available_tools()
tool = tools[0]
langs = tool.get_available_languages()
lang = langs[0]
# langs = tool.get_available_languages()
# lang = langs[0]
return tool, "deu_frak2"


Expand All @@ -44,22 +44,30 @@ def load_tif_pdf_plaintext(corpus):
image = Image.open(tif_path)
image.save(pdf_path)

PDF.objects.get(slug=corpus.slug, num=i)

# todo: use get_or_create
pdf = PDF(name=name, num=i, path=pdf_path, slug=corpus.slug)
tif = TIF(name=name, num=i, path=tif_path, slug=corpus.slug)
pdf = PDF.objects.get_or_create(
name=name, num=i, path=pdf_path, slug=corpus.slug
)
tif = TIF.objects.get_or_create(
name=name, num=i, path=tif_path, slug=corpus.slug
)

try:
pdf.save()
tif.save()

print(f"({i+1}/{tot}) Storing PDF/TIF in DB: {pdf.path}")

# if there is already an OCRUpdate for this PDF, not much left to do
try:
exists = OCRUpdate.objects.get(pdf=pdf)
OCRUpdate.objects.get(pdf=pdf)
continue
except ObjectDoesNotExist:
pass

# there is no OCRUpdate for this code; therefore we need to build and save it
# there is no OCRUpdate for this code; therefore we build and save it
plaintext = ocr_engine.image_to_string(
Image.open(tif_path),
lang=lang_chosen,
Expand Down
19 changes: 13 additions & 6 deletions compare/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,23 @@ class Migration(migrations.Migration):

initial = True

dependencies = [
]
dependencies = []

operations = [
migrations.CreateModel(
name='OCRText',
name="OCRText",
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('content', martor.models.MartorField()),
('commit_msg', models.CharField(blank=True, max_length=255)),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("content", martor.models.MartorField()),
("commit_msg", models.CharField(blank=True, max_length=255)),
],
),
]
24 changes: 15 additions & 9 deletions compare/migrations/0002_auto_20200511_1717.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,26 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0001_initial'),
("compare", "0001_initial"),
]

operations = [
migrations.CreateModel(
name='Post',
name="Post",
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=200)),
('description', martor.models.MartorField()),
('wiki', martor.models.MartorField()),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("title", models.CharField(max_length=200)),
("description", martor.models.MartorField()),
("wiki", martor.models.MartorField()),
],
),
migrations.DeleteModel(
name='OCRText',
),
migrations.DeleteModel(name="OCRText",),
]
22 changes: 15 additions & 7 deletions compare/migrations/0003_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,26 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0002_auto_20200511_1717'),
("compare", "0002_auto_20200511_1717"),
]

operations = [
migrations.CreateModel(
name='PDF',
name="PDF",
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('slug', models.SlugField(max_length=255)),
('path', models.TextField()),
('name', models.CharField(max_length=200)),
('num', models.IntegerField()),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("slug", models.SlugField(max_length=255)),
("path", models.TextField()),
("name", models.CharField(max_length=200)),
("num", models.IntegerField()),
],
),
]
7 changes: 2 additions & 5 deletions compare/migrations/0004_auto_20200525_1446.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0003_pdf'),
("compare", "0003_pdf"),
]

operations = [
migrations.AlterUniqueTogether(
name='pdf',
unique_together={('slug', 'num')},
),
migrations.AlterUniqueTogether(name="pdf", unique_together={("slug", "num")},),
]
47 changes: 26 additions & 21 deletions compare/migrations/0005_auto_20200525_1902.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,40 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0004_auto_20200525_1446'),
("compare", "0004_auto_20200525_1446"),
]

operations = [
migrations.RemoveField(
model_name='post',
name='title',
),
migrations.RemoveField(
model_name='post',
name='wiki',
),
migrations.RemoveField(model_name="post", name="title",),
migrations.RemoveField(model_name="post", name="wiki",),
migrations.AddField(
model_name='post',
name='commit_msg',
model_name="post",
name="commit_msg",
field=models.CharField(blank=True, max_length=200),
),
migrations.CreateModel(
name='OCRUpdate',
name="OCRUpdate",
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('slug', models.SlugField(max_length=255)),
('commit_msg', models.CharField(blank=True, max_length=200)),
('timestamp', models.DateTimeField(auto_now_add=True)),
('text', models.TextField()),
('pdf', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='compare.PDF')),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("slug", models.SlugField(max_length=255)),
("commit_msg", models.CharField(blank=True, max_length=200)),
("timestamp", models.DateTimeField(auto_now_add=True)),
("text", models.TextField()),
(
"pdf",
models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT, to="compare.PDF"
),
),
],
options={
'unique_together': {('slug', 'timestamp')},
},
options={"unique_together": {("slug", "timestamp")}},
),
]
5 changes: 2 additions & 3 deletions compare/migrations/0006_auto_20200525_1944.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0005_auto_20200525_1902'),
("compare", "0005_auto_20200525_1902"),
]

operations = [
migrations.AlterUniqueTogether(
name='ocrupdate',
unique_together={('slug', 'timestamp', 'pdf')},
name="ocrupdate", unique_together={("slug", "timestamp", "pdf")},
),
]
6 changes: 2 additions & 4 deletions compare/migrations/0007_auto_20200526_1300.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0006_auto_20200525_1944'),
("compare", "0006_auto_20200525_1944"),
]

operations = [
migrations.AlterField(
model_name='ocrupdate',
name='text',
field=models.TextField(blank=True),
model_name="ocrupdate", name="text", field=models.TextField(blank=True),
),
]
10 changes: 4 additions & 6 deletions compare/migrations/0008_auto_20200526_1311.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,16 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0007_auto_20200526_1300'),
("compare", "0007_auto_20200526_1300"),
]

operations = [
migrations.AlterField(
model_name='ocrupdate',
name='text',
field=models.TextField(),
model_name="ocrupdate", name="text", field=models.TextField(),
),
migrations.AlterField(
model_name='post',
name='description',
model_name="post",
name="description",
field=martor.models.MartorField(blank=True),
),
]
26 changes: 16 additions & 10 deletions compare/migrations/0009_tif.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,27 @@
class Migration(migrations.Migration):

dependencies = [
('compare', '0008_auto_20200526_1311'),
("compare", "0008_auto_20200526_1311"),
]

operations = [
migrations.CreateModel(
name='TIF',
name="TIF",
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('slug', models.SlugField(max_length=255)),
('path', models.TextField()),
('name', models.CharField(max_length=200)),
('num', models.IntegerField()),
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("slug", models.SlugField(max_length=255)),
("path", models.TextField()),
("name", models.CharField(max_length=200)),
("num", models.IntegerField()),
],
options={
'unique_together': {('slug', 'num')},
},
options={"unique_together": {("slug", "num")}},
),
]
2 changes: 1 addition & 1 deletion compare/tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from django.test import TestCase
# from django.test import TestCase

# Create your tests here.
5 changes: 3 additions & 2 deletions compare/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from buzz import Corpus as BuzzCorpus
from explore.models import Corpus
from .models import OCRUpdate
from django.core.exceptions import ObjectDoesNotExist
from .models import OCRUpdate, PDF

# from django.core.exceptions import ObjectDoesNotExist


def markdown_to_buzz_input(markdown):
Expand Down

0 comments on commit cbf4f8b

Please sign in to comment.