Skip to content

Commit

Permalink
Seems to be a working system
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Aug 26, 2019
1 parent 146e1dc commit e3e5c58
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 6 deletions.
15 changes: 15 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
language: python
python:
- "3.6"
-
install:
- pip install -r requirements.txt
- pip install coveralls
- pip install nose

# command to run tests
script:
- nosetests ./tests --with-coverage --cover-package=flask_pie --cover-xml --verbose --nologcapture
after_success:
- coverage combine
- coveralls
2 changes: 1 addition & 1 deletion flask_pie/formatters/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __call__(self, tasks):
self.pos_tag = "pos"
return self

def format_headers(self):
def get_headers(self):
return GlueFormatter.HEADERS

def format_line(self, token, tags, ignored=False):
Expand Down
5 changes: 4 additions & 1 deletion flask_pie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,12 @@ def write_sentence_end(self) -> str:
def write_footer(self) -> str:
return ""

def get_headers(self):
return ["token"] + self.tasks

def write_headers(self)-> str:
""" Format the headers """
return self.write_line(["token"] + self.tasks)
return self.write_line(self.get_headers())


class MemoryzingTokenizer(object):
Expand Down
9 changes: 9 additions & 0 deletions tests/data/fake1.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
token Case Deg Dis Gend Mood Numb Person Tense Voice lemma pos
latina Nom _ 2 _ _ Plur _ _ _ latina NOMcom
qua _ _ 1 _ _ _ _ _ _ qua ADVint
bella Nom _ _ _ _ Plur _ _ _ bellum NOMcom
sunt _ _ 1 _ Ind Plur 3 Pres Act sum VER

sunt _ _ 1 _ Ind Plur 3 Pres Act sum VER
bella Nom _ _ _ _ Plur _ _ _ bellum NOMcom
sumus _ _ 1 _ Ind Plur 1 Pres Act sum VER
1 change: 1 addition & 0 deletions tests/data/fake1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Latina qua bella sunt , , . Sunt bella , sumus.
13 changes: 13 additions & 0 deletions tests/data/fake1_output.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
form lemma POS morph treated_token
latina LATINA NOMcom Case=Nom|Numb=Plur latina
qua QVA ADVint MORPH=empty qua
bella BELLVM NOMcom Case=Nom|Numb=Plur bella
sunt SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=3 sunt
, , PUNC MORPH=empty ,
, , PUNC MORPH=empty ,
. . PUNC MORPH=empty .
sunt SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=3 sunt
bella BELLVM NOMcom Case=Nom|Numb=Plur bella
, , PUNC MORPH=empty ,
sumus SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=1 sumus
. . PUNC MORPH=empty .
158 changes: 154 additions & 4 deletions tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from unittest import TestCase
import csv
import io
import os


from flask import Flask

from flask_pie import PieController
from flask_pie.utils import Formatter, DataIterator, Tokenizer
from flask_pie.utils import Formatter, DataIterator, MemoryzingTokenizer
from flask_pie.testing import FakeTagger
from flask_pie.formatters.glue import GlueFormatter


class IntegrationTest(TestCase):
class TestGenericParameters(TestCase):
def create(self, **kwargs):
defaults = dict(
formatter_class=Formatter,
Expand All @@ -30,7 +33,53 @@ def read_tsv(self, response):
reader = csv.reader(io.StringIO(response.data.decode()), delimiter="\t")
return list(iter(reader))

def tagger_response_from_file(self, filepath):
tasks = []
tokens = [[]]
with open(os.path.join(
os.path.dirname(os.path.abspath(__file__)),
filepath
)) as f:
for index, row in enumerate(csv.reader(f, delimiter="\t")):
if index == 0:
tasks = row[1:]
elif len(row) == 0:
tokens.append([])
else:
tokens[-1].append((row[0], tuple(row[1:])))
return {"tasks": tasks, "tokens": tokens}

def test_simple(self):
""" Test the basic route with simple data"""
tagger = FakeTagger(
tokens=[
[('Lasciva', ('lascivus', )), ('Roma', ('Roma', ))]
],
tasks=["lemma"]
)

client = self.create(model_file=tagger)

response = client.get("/api/?data=Lasciva Roma")
self.assertEqual(
[["token", "lemma"], ["Lasciva", "lascivus"], ["Roma", "Roma"]],
self.read_tsv(response),
"TSV should be well generated"
)
self.assertEqual(
tagger.seen, [['Lasciva', 'Roma']]
)
tagger.seen = []

# Ask for lowering
client.get("/api/?data=Lasciva Roma&lower=True")

self.assertEqual(
tagger.seen, [['lasciva', 'roma']]
)

def test_force_lower(self):
""" Test the basic route with simple data but forcing lowering """
tagger = FakeTagger(
tokens=[
[('Lasciva', ('lascivus', )), ('Roma', ('Roma', ))]
Expand All @@ -39,7 +88,8 @@ def test_simple(self):
)

client = self.create(
model_file=tagger
model_file=tagger,
force_lower=True
)
response = client.get("/api/?data=Lasciva Roma")
self.assertEqual(
Expand All @@ -48,7 +98,7 @@ def test_simple(self):
"TSV should be well generated"
)
self.assertEqual(
tagger.seen, [['Lasciva', 'Roma']]
tagger.seen, [['lasciva', 'roma']]
)
tagger.seen = []
# Ask for lowering
Expand All @@ -57,3 +107,103 @@ def test_simple(self):
self.assertEqual(
tagger.seen, [['lasciva', 'roma']]
)

def test_sent_tokenization(self):
""" Test the basic route with simple data"""
tagger = FakeTagger(
tokens=[
[('Lasciva', ('lascivus', )), ('Roma', ('Roma', ))],
[('Virgo', ('virgo',)), ('est', ('sum',))]
],
tasks=["lemma"]
)

client = self.create(
model_file=tagger
)

response = client.get("/api/?data=Lasciva Roma. Virgo est.")
self.assertEqual(
[["token", "lemma"], ["Lasciva", "lascivus"], ["Roma", "Roma"], ["Virgo", "virgo"], ["est", "sum"]],
self.read_tsv(response),
"TSV should be well generated"
)
self.assertEqual(
[['Lasciva', 'Roma', '.'], ['Virgo', 'est', '.']], tagger.seen,
"Despite faking output, each sentence should be seen completelly"
)

def test_punct_reinsertion(self):
""" Test the basic route with simple data"""
tagger = FakeTagger(
tokens=[
[('Lasciva', ('lascivus', )), ('Roma', ('Roma', ))],
[('Virgo', ('virgo',)), ('est', ('sum',))]
],
tasks=["lemma"]
)

client = self.create(
model_file=tagger,
iterator=DataIterator(remove_from_input=DataIterator.remove_punctuation)
)

response = client.get("/api/?data=Lasciva Roma. Virgo est.")
self.assertEqual(
[
["token", "lemma"],
["Lasciva", "lascivus"],
["Roma", "Roma"],
[".", ""],
["Virgo", "virgo"],
["est", "sum"],
[".", ""]
],
self.read_tsv(response),
"The tagger should not receive any punctuation but it should be reinserted at response time"
)
self.assertEqual(
[['Lasciva', 'Roma'], ['Virgo', 'est']],
tagger.seen,
"The tagger should not receive any punctuation but it should be reinserted at response time"
)

def test_glue_formatter(self):
""" Check that glue formatter works okay ! """
tokenizer = MemoryzingTokenizer(replacer=lambda x: x.replace("v", "u"))

tagger = FakeTagger(**self.tagger_response_from_file("./data/fake1.tsv"))

client = self.create(
model_file=tagger,
headers={"X-Accel-Buffering": "no"},
formatter_class=GlueFormatter(tokenizer),
force_lower=True,
iterator=DataIterator(tokenizer=tokenizer, remove_from_input=DataIterator.remove_punctuation)
)

response = client.post("/api/", data={"data": "Latina qua bella sunt , , . Svnt bella , sumus."})

self.assertEqual(
"""form lemma POS morph treated_token
latina LATINA NOMcom Case=Nom|Numb=Plur latina
qua QVA ADVint MORPH=empty qua
bella BELLVM NOMcom Case=Nom|Numb=Plur bella
sunt SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=3 sunt
, , PUNC MORPH=empty ,
, , PUNC MORPH=empty ,
. . PUNC MORPH=empty .
svnt SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=3 sunt
bella BELLVM NOMcom Case=Nom|Numb=Plur bella
, , PUNC MORPH=empty ,
sumus SVM VER Numb=Plur|Mood=Ind|Tense=Pres|Voice=Act|Person=1 sumus
. . PUNC MORPH=empty .""",
response.data.decode().strip().replace("\r", ""),
"morph should be glued, original token put back in, values should be changed"
)

self.assertEqual(
[["latina", "qua", "bella", "sunt"], ["sunt", "bella", "sumus"]],
tagger.seen,
"Punctuation should not be seen, v should be Uified (Second sunt)"
)

0 comments on commit e3e5c58

Please sign in to comment.