Skip to content

Commit

Permalink
Merge pull request #223 from CatalystCode/enhancement/data-import-cov…
Browse files Browse the repository at this point in the history
…erage

Enhancement/Ensure data pagination is covered in tests
  • Loading branch information
Hironsan committed Jun 11, 2019
2 parents d5514ab + e1ae68c commit 427f59b
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 13 deletions.
2 changes: 1 addition & 1 deletion app/app/settings.py
Expand Up @@ -257,7 +257,7 @@

# Size of the batch for creating documents
# on the import phase
IMPORT_BATCH_SIZE = 500
IMPORT_BATCH_SIZE = env.int('IMPORT_BATCH_SIZE', 500)

GOOGLE_TRACKING_ID = env('GOOGLE_TRACKING_ID', 'UA-125643874-2')

Expand Down
1 change: 1 addition & 0 deletions app/server/tests/test_api.py
Expand Up @@ -904,6 +904,7 @@ def test_no_cloud_upload(self):
self.assertFalse(response.json().get('cloud_upload'))


@override_settings(IMPORT_BATCH_SIZE=2)
class TestParser(APITestCase):

def parser_helper(self, filename, parser, include_label=True):
Expand Down
18 changes: 6 additions & 12 deletions app/server/utils.py
Expand Up @@ -7,10 +7,10 @@
from random import Random

from django.db import transaction
from django.conf import settings
from rest_framework.renderers import JSONRenderer
from seqeval.metrics.sequence_labeling import get_entities

from app.settings import IMPORT_BATCH_SIZE
from .exceptions import FileParseException
from .models import Label
from .serializers import DocumentSerializer, LabelSerializer
Expand Down Expand Up @@ -242,19 +242,13 @@ class CoNLLParser(FileParser):
```
"""
def parse(self, file):
"""Store json for seq2seq.
Return format:
{"text": "Python is awesome!", "labels": [[0, 6, "Product"],]}
...
"""
words, tags = [], []
data = []
file = io.TextIOWrapper(file, encoding='utf-8')
for i, line in enumerate(file, start=1):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
line = line.decode('utf-8')
line = line.strip()
if line:
try:
Expand Down Expand Up @@ -301,7 +295,7 @@ class PlainTextParser(FileParser):
def parse(self, file):
file = io.TextIOWrapper(file, encoding='utf-8')
while True:
batch = list(itertools.islice(file, IMPORT_BATCH_SIZE))
batch = list(itertools.islice(file, settings.IMPORT_BATCH_SIZE))
if not batch:
break
yield [{'text': line.strip()} for line in batch]
Expand All @@ -327,7 +321,7 @@ def parse(self, file):
columns = next(reader)
data = []
for i, row in enumerate(reader, start=2):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
if len(row) == len(columns) and len(row) >= 2:
Expand All @@ -347,7 +341,7 @@ def parse(self, file):
file = io.TextIOWrapper(file, encoding='utf-8')
data = []
for i, line in enumerate(file, start=1):
if len(data) >= IMPORT_BATCH_SIZE:
if len(data) >= settings.IMPORT_BATCH_SIZE:
yield data
data = []
try:
Expand Down

0 comments on commit 427f59b

Please sign in to comment.