Skip to content

Commit

Permalink
Merge branch '0.1.0-dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Apr 28, 2020
2 parents ea8ce56 + fe01d6e commit dbb8ef7
Show file tree
Hide file tree
Showing 17 changed files with 84 additions and 640 deletions.
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ RUN apt-get update && apt-get install -y zip git
RUN pip3 install --upgrade pip setuptools

# Install Pie and Pie Webapp requirements
RUN pip3 install flask_pie==0.0.7 https://download.pytorch.org/whl/cpu/torch-1.1.0-cp36-cp36m-linux_x86_64.whl gunicorn

RUN pip3 install flask_pie==0.1.0 https://download.pytorch.org/whl/cpu/torch-1.1.0-cp36-cp36m-linux_x86_64.whl gunicorn
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,21 @@
# pie-flask
Flask API for Pie

## How to use :

You can retrieve a Blueprint by using the following code

```python
from flask import Flask
from flask_pie import PieController
from pie_extended.models.fro import get_iterator_and_processor
from pie_extended.cli.sub import get_tagger

controller = PieController(
tagger=get_tagger("fro"),
get_iterator_and_processor=get_iterator_and_processor,
path="fro"
)


```
Empty file removed flask_pie/datasets/__init__.py
Empty file.
21 changes: 0 additions & 21 deletions flask_pie/datasets/lasla.py

This file was deleted.

Empty file.
26 changes: 0 additions & 26 deletions flask_pie/disambiguator/autocat.py

This file was deleted.

6 changes: 0 additions & 6 deletions flask_pie/disambiguator/proto.py

This file was deleted.

24 changes: 0 additions & 24 deletions flask_pie/disambiguator/tarte.py

This file was deleted.

166 changes: 28 additions & 138 deletions flask_pie/ext.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,40 @@
from flask import Blueprint, request, Flask, Response, stream_with_context
from werkzeug.exceptions import BadRequest
from typing import Iterator
from pie.tagger import Tagger
from pie.utils import chunks, model_spec

from .testing import FakeTagger
from .utils import DataIterator, Tokenizer, Formatter
from pie_extended.tagger import ExtensibleTagger
from pie_extended.pipeline.postprocessor.proto import ProcessorPrototype
from pie_extended.pipeline.iterators.proto import DataIterator
from pie_extended.pipeline.formatters.proto import Formatter


from typing import Callable, Tuple, Type


class PieController(object):
def __init__(self,
path: str = "/api", name: str = "nlp_pie", iterator: DataIterator = None, device: str = None,
batch_size: int = None, model_file: str = None, formatter_class: Formatter = None,
headers=None, force_lower=False, disambiguation=None, get_iterator_and_formatter=None):
tagger: ExtensibleTagger,
get_iterator_and_processor: Callable[[], Tuple[DataIterator, ProcessorPrototype]],
path: str = "/api",
name: str = "nlp_pie",
batch_size: int = None,
formatter_class: Type[Formatter] = Formatter,
headers=None, force_lower=False):

self._bp: Blueprint = Blueprint(name, import_name=name, url_prefix=path)
self.tokenizer: Tokenizer = None
self.force_lower = force_lower
self.formatter_class = formatter_class or Formatter
self.tagger = tagger
self.get_iterator_and_processor = get_iterator_and_processor
self.batch_size = batch_size
self.model_file = model_file
self.tagger.batch_size = batch_size or 8
self.formatter = formatter_class
self.headers = {
'Content-Type': 'text/plain; charset=utf-8',
'Access-Control-Allow-Origin': "*"
}
if isinstance(headers, dict):
self.headers.update(headers)

if isinstance(model_file, FakeTagger):
self.tagger = model_file
else:
self.tagger = Tagger(device=device, batch_size=batch_size)

for model, tasks in model_spec(model_file):
self.tagger.add_model(model, *tasks)

self.iterator = iterator
if not iterator:
self.iterator = DataIterator()

self._get_iterator_and_formatter = get_iterator_and_formatter
self.disambiguation = disambiguation

def get_iterator_and_formatter(self):
if self._get_iterator_and_formatter:
return self._get_iterator_and_formatter()
return self.iterator, self.formatter_class

def init_app(self, app: Flask):
self._bp.add_url_rule("/", view_func=self.route, endpoint="main", methods=["GET", "POST", "OPTIONS"])
app.register_blueprint(self._bp)
Expand Down Expand Up @@ -74,115 +63,16 @@ def csv_stream(self) -> Iterator[str]:
else:
data = request.form.get("data")

if lower:
data = data.lower()

if not data:
yield ""
raise BadRequest()
else:
iter_fn, formatter = self.get_iterator_and_formatter()
yield from self.build_response(
data,
lower=lower,
iter_fn, proc = self.get_iterator_and_processor()
yield from self.tagger.iter_tag(
data=data,
formatter_class=self.formatter,
iterator=iter_fn,
batch_size=self.batch_size,
tagger=self.tagger,
formatter_class=formatter
)

def reinsert_full(self, formatter, sent_reinsertion, tasks):
yield formatter.write_sentence_beginning()
# If a sentence is empty, it's most likely because everything is in sent_reinsertions
for reinsertion in sorted(list(sent_reinsertion.keys())):
yield formatter.write_line(
formatter.format_line(
token=sent_reinsertion[reinsertion],
tags=[""] * len(tasks)
)
processor=proc
)
yield formatter.write_sentence_end()

def build_response(self, data, iterator, lower, batch_size, tagger, formatter_class):
header = False
formatter = None
for chunk in chunks(iterator(data, lower=lower), size=batch_size):
# Unzip the batch into the sentences, their sizes and the dictionaries of things that needs
# to be reinserted
sents, lengths, needs_reinsertion = zip(*chunk)
# Removing punctuation might create empty sentences !
# Which would crash Torch
empty_sents_indexes = {
index: []
for index, sent in enumerate(sents)
if len(sent) == 0
}
tagged, tasks = tagger.tag(sents=[sent for sent in sents if len(sent)], lengths=lengths)
formatter = formatter_class(tasks)

# We keep a real sentence index
real_sentence_index = 0
for sent in tagged:
if not sent:
continue
# Gets things that needs to be reinserted
sent_reinsertion = needs_reinsertion[real_sentence_index]

# If the header has not yet be written, write it
if not header:
yield formatter.write_headers()
header = True

# Some sentences can be empty and would have been removed from tagging
# we check and until we get to a non empty sentence
# we increment the real_sentence_index to keep in check with the reinsertion map
while real_sentence_index in empty_sents_indexes:
yield from self.reinsert_full(
formatter,
needs_reinsertion[real_sentence_index],
tasks
)
real_sentence_index += 1

yield formatter.write_sentence_beginning()

# If we have a disambiguator, we run the results into it
if self.disambiguation:
sent = self.disambiguation(sent, tasks)

reinsertion_index = 0
index = 0

for index, (token, tags) in enumerate(sent):
while reinsertion_index + index in sent_reinsertion:
yield formatter.write_line(
formatter.format_line(
token=sent_reinsertion[reinsertion_index + index],
tags=[""] * len(tasks)
)
)
del sent_reinsertion[reinsertion_index + index]
reinsertion_index += 1

yield formatter.write_line(
formatter.format_line(token, tags)
)

for reinsertion in sorted(list(sent_reinsertion.keys())):
yield formatter.write_line(
formatter.format_line(
token=sent_reinsertion[reinsertion],
tags=[""] * len(tasks)
)
)

yield formatter.write_sentence_end()

real_sentence_index += 1

while real_sentence_index in empty_sents_indexes:
yield from self.reinsert_full(
formatter,
needs_reinsertion[real_sentence_index],
tasks
)
real_sentence_index += 1

if formatter:
yield formatter.write_footer()
Empty file removed flask_pie/formatters/__init__.py
Empty file.
60 changes: 0 additions & 60 deletions flask_pie/formatters/glue.py

This file was deleted.

0 comments on commit dbb8ef7

Please sign in to comment.