Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Fix minor code issues #1717

Merged
merged 2 commits into from
Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions backend/data_export/pipeline/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ class Options:
@classmethod
def filter_by_task(cls, task_name: str):
options = cls.options[task_name]
return [{**format.dict(), **option.schema(), "example": example} for format, option, example in options]
return [
{**file_format.dict(), **option.schema(), "example": example} for file_format, option, example in options
]

@classmethod
def register(cls, task: str, format: Type[Format], option: Type[BaseModel], example: str):
cls.options[task].append((format, option, example))
def register(cls, task: str, file_format: Type[Format], option: Type[BaseModel], example: str):
cls.options[task].append((file_format, option, example))


# Text Classification
Expand Down
4 changes: 2 additions & 2 deletions backend/data_export/pipeline/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

class Record:
def __init__(
self, id: int, data: str, label: Union[List[Any], Dict[Any, Any]], user: str, metadata: Dict[Any, Any]
self, data_id: int, data: str, label: Union[List[Any], Dict[Any, Any]], user: str, metadata: Dict[Any, Any]
):
self.id = id
self.id = data_id
self.data = data
self.label = label
self.user = user
Expand Down
8 changes: 4 additions & 4 deletions backend/data_export/pipeline/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def list(self, export_approved=False) -> Iterator[Record]:
label_per_user = self.reduce_user(label_per_user)
for user, label in label_per_user.items():
yield Record(
id=example.id,
data_id=example.id,
data=str(example.filename).split("/")[-1],
label=label,
user=user,
Expand All @@ -45,7 +45,7 @@ def list(self, export_approved=False) -> Iterator[Record]:
# This means I will allow each user to be able to approve the doc.
if len(label_per_user) == 0:
yield Record(
id=example.id, data=str(example.filename).split("/")[-1], label=[], user="unknown", metadata={}
data_id=example.id, data=str(example.filename).split("/")[-1], label=[], user="unknown", metadata={}
)

def label_per_user(self, example) -> Dict:
Expand Down Expand Up @@ -82,15 +82,15 @@ def list(self, export_approved=False):
if self.project.collaborative_annotation:
label_per_user = self.reduce_user(label_per_user)
for user, label in label_per_user.items():
yield Record(id=doc.id, data=doc.text, label=label, user=user, metadata=doc.meta)
yield Record(data_id=doc.id, data=doc.text, label=label, user=user, metadata=doc.meta)
# todo:
# If there is no label, export the doc with `unknown` user.
# This is a quick solution.
# In the future, the doc without label will be exported
# with the user who approved the doc.
# This means I will allow each user to be able to approve the doc.
if len(label_per_user) == 0:
yield Record(id=doc.id, data=doc.text, label=[], user="unknown", metadata={})
yield Record(data_id=doc.id, data=doc.text, label=[], user="unknown", metadata={})

@abc.abstractmethod
def label_per_user(self, doc) -> Dict:
Expand Down
16 changes: 10 additions & 6 deletions backend/data_export/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
class TestCSVWriter(unittest.TestCase):
def setUp(self):
self.records = [
Record(id=0, data="exampleA", label=["labelA"], user="admin", metadata={"hidden": "secretA"}),
Record(id=1, data="exampleB", label=["labelB"], user="admin", metadata={"hidden": "secretB"}),
Record(id=2, data="exampleC", label=["labelC"], user="admin", metadata={"meta": "secretC"}),
Record(data_id=0, data="exampleA", label=["labelA"], user="admin", metadata={"hidden": "secretA"}),
Record(data_id=1, data="exampleB", label=["labelB"], user="admin", metadata={"hidden": "secretB"}),
Record(data_id=2, data="exampleC", label=["labelC"], user="admin", metadata={"meta": "secretC"}),
]

def test_create_header(self):
Expand All @@ -29,8 +29,8 @@ def test_create_line(self):

def test_label_order(self):
writer = CsvWriter(".")
record1 = Record(id=0, data="", label=["labelA", "labelB"], user="", metadata={})
record2 = Record(id=0, data="", label=["labelB", "labelA"], user="", metadata={})
record1 = Record(data_id=0, data="", label=["labelA", "labelB"], user="", metadata={})
record2 = Record(data_id=0, data="", label=["labelB", "labelA"], user="", metadata={})
line1 = writer.create_line(record1)
line2 = writer.create_line(record2)
expected = "labelA#labelB"
Expand Down Expand Up @@ -61,7 +61,11 @@ def test_dump(self, mock_open_file, csv_io, zip_io, mock_remove_file):
class TestIntentWriter(unittest.TestCase):
def setUp(self):
self.record = Record(
id=0, data="exampleA", label={"cats": ["positive"], "entities": [(0, 1, "LOC")]}, user="admin", metadata={}
data_id=0,
data="exampleA",
label={"cats": ["positive"], "entities": [(0, 1, "LOC")]},
user="admin",
metadata={},
)

def test_create_line(self):
Expand Down
6 changes: 3 additions & 3 deletions backend/data_import/pipeline/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def parse(self, filename: str) -> Iterator[Dict[Any, Any]]:
encoding = decide_encoding(filename, self.encoding)
with open(filename, encoding=encoding) as f:
reader = csv.DictReader(f, delimiter=self.delimiter)
for line_num, row in enumerate(reader, start=2):
for row in reader:
yield row


Expand All @@ -171,7 +171,7 @@ def parse(self, filename: str) -> Iterator[Dict[Any, Any]]:
with open(filename, encoding=encoding) as f:
try:
rows = json.load(f)
for line_num, row in enumerate(rows, start=1):
for row in rows:
yield row
except json.decoder.JSONDecodeError as e:
error = FileParseException(filename, line_num=1, message=str(e))
Expand Down Expand Up @@ -246,7 +246,7 @@ def __init__(self, encoding: str = DEFAULT_ENCODING, label: str = "__label__", *

def parse(self, filename: str) -> Iterator[Dict[Any, Any]]:
reader = LineReader(filename, self.encoding)
for line_num, line in enumerate(reader, start=1):
for line in reader:
labels = []
tokens = []
for token in line.rstrip().split(" "):
Expand Down
2 changes: 1 addition & 1 deletion backend/label_types/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ def test_can_add_label_suffix_key_with_prefix_key(self):

class TestGeneratedColor(TestCase):
def test_length(self):
for i in range(100):
for _ in range(100):
color = generate_random_hex_color()
self.assertEqual(len(color), 7)
2 changes: 1 addition & 1 deletion backend/labels/migrations/0008_auto_20220222_0630.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def delete_new_relation(apps, schema_editor):
RelationOld = apps.get_model("labels", "RelationOld")
RelationTypeOld = apps.get_model("label_types", "RelationTypeOld")
for relation in RelationNew.objects.all():
relation_type, created = RelationTypeOld.objects.get_or_create(
relation_type, _ = RelationTypeOld.objects.get_or_create(
project=relation.type.project, name=relation.type.text, color=relation.type.background_color
)
RelationOld(
Expand Down