Skip to content

Commit

Permalink
Fixed typing
Browse files Browse the repository at this point in the history
  • Loading branch information
oktaal committed Apr 3, 2024
1 parent 8d9db05 commit e52c301
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 61 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![Actions Status](https://github.com/UUDigitalHumanitiesLab/corpus2alpino/workflows/Unit%20tests/badge.svg)](https://github.com/UUDigitalHumanitiesLab/corpus2alpino/actions)

[PyPi/corpus2alpino](https://pypi.org/project/corpus2alpino/)
[![PyPi/corpus2alpino](https://img.shields.io/pypi/v/corpus2alpino)](https://pypi.org/project/corpus2alpino/)

# CHAT, FoLiA, PaQu metadata, plaintext and TEI to Alpino XML or PaQu metadata format

Expand Down
19 changes: 9 additions & 10 deletions corpus2alpino/converter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
#!/usr/bin/env python3
from typing import List
from typing import List, Optional

from corpus2alpino.collectors.filesystem import FilesystemCollector
from corpus2alpino.readers.auto import AutoReader
from corpus2alpino.targets.console import ConsoleTarget
from corpus2alpino.targets.filesystem import FilesystemTarget
from corpus2alpino.writers.lassy import LassyWriter
from corpus2alpino.writers.paqu import PaQuWriter

from corpus2alpino.abstracts import Annotator, Collector, Reader, Target, Writer
Expand All @@ -16,12 +13,14 @@ class Converter:
Class for converting files to Alpino XML (input) files.
"""

def __init__(self,
collector: Collector,
annotators: List[Annotator] = None,
reader: Reader = AutoReader(),
writer: Writer = PaQuWriter(),
target: Target = ConsoleTarget()) -> None:
def __init__(
self,
collector: Collector,
annotators: Optional[List[Annotator]] = None,
reader: Reader = AutoReader(),
writer: Writer = PaQuWriter(),
target: Target = ConsoleTarget(),
) -> None:
self.collector = collector
self.annotators = annotators or []
self.reader = reader
Expand Down
39 changes: 22 additions & 17 deletions corpus2alpino/models.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
#!/usr/bin/env python3
from typing import Dict, List, Iterable
from typing import Dict, Iterable, Optional


class CollectedFile:
def __init__(self, relpath: str, filename: str, mimetype: str,
content: str) -> None:
def __init__(
self, relpath: str, filename: str, mimetype: str, content: str
) -> None:
self.relpath = relpath
self.filename = filename
self.mimetype = mimetype
self.content = content


class MetadataValue:
def __init__(self, value: str, type: str='text') -> None:
def __init__(self, value: str, type: str = "text") -> None:
self.value = value
self.type = type


class Utterance:
def __init__(self,
text: str,
id: str,
metadata: Dict[str, MetadataValue] = None,
line: int = 0,
annotations: Dict[str, str] = None) -> None:
def __init__(
self,
text: str,
id: str,
metadata: Optional[Dict[str, MetadataValue]] = None,
line: int = 0,
annotations: Optional[Dict[str, str]] = None,
) -> None:
self.text = text
self.id = id
self.metadata = metadata or {}
Expand All @@ -32,19 +35,21 @@ def __init__(self,


class Document:
def __init__(self,
collected_file: CollectedFile,
utterances: Iterable[Utterance],
metadata: Dict[str, MetadataValue] = None,
subpath: str = '',
annotations: Dict[str, str] = None) -> None:
def __init__(
self,
collected_file: CollectedFile,
utterances: Iterable[Utterance],
metadata: Optional[Dict[str, MetadataValue]] = None,
subpath: str = "",
annotations: Optional[Dict[str, str]] = None,
) -> None:
"""
A document found in a file.
subpath: if a file has an internal structure, this
contains a string representation of that relative to
the file. E.g. if a tei.xml contains a document A at the
root and a document B
root and a document B
"""
self.collected_file = collected_file
Expand Down
17 changes: 9 additions & 8 deletions corpus2alpino/targets/console.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from os import path
from pathlib import Path
from typing import Optional

from corpus2alpino.abstracts import Target
from corpus2alpino.models import Document
Expand All @@ -10,15 +9,17 @@ class ConsoleTarget(Target):
Output chunks to the console on separate lines.
"""

def write(self,
document: Document,
content: str,
filename: str = None,
suffix: str = None):
def write(
self,
document: Document,
content: str,
filename: Optional[str] = None,
suffix: Optional[str] = None,
):
"""
Write all lines to stdout.
"""
print(content, end='')
print(content, end="")

def flush(self):
return
Expand Down
35 changes: 19 additions & 16 deletions corpus2alpino/targets/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from os import path, makedirs
from pathlib import Path
from typing import cast, Any
from typing import Optional, cast


class FilesystemTarget(Target):
Expand All @@ -14,23 +14,24 @@ class FilesystemTarget(Target):

__current_output_path = None

def __open_file(self, document: Document, filename: str = None, suffix: str = None):
def __open_file(self, document: Document, filename: Optional[str] = None, suffix: Optional[str] = None):
if self.merge_files:
# when merge_files = True, a file is already open
return

output_path = path.join(self.output_path,
document.collected_file.relpath,
document.collected_file.filename)
return

output_path = path.join(
self.output_path,
document.collected_file.relpath,
document.collected_file.filename,
)

if document.subpath:
output_path = path.join(output_path, document.subpath)

if filename != None:
output_path = path.join(output_path, cast(str, filename))
if suffix != None:
output_path = str(
Path(output_path).with_suffix(cast(str, suffix)))
output_path = str(Path(output_path).with_suffix(cast(str, suffix)))

# always open a new file when splitting in separate files
self.__current_output_path = None
Expand All @@ -53,7 +54,7 @@ def __open_unique(self, directory: str, filename: str):
target = Path(path.join(directory, prefix + filename))
if not target.is_file():
# new file!
return target.open('w', encoding='utf-8')
return target.open("w", encoding="utf-8")
attempts += 1

def __init__(self, output_path: str, merge_files=False) -> None:
Expand All @@ -63,15 +64,17 @@ def __init__(self, output_path: str, merge_files=False) -> None:
if self.merge_files:
# using a single file
makedirs(path.dirname(output_path), exist_ok=True)
self.file = open(output_path, 'w', encoding='utf-8')
self.file = open(output_path, "w", encoding="utf-8")
else:
self.file = None # type: ignore

def write(self,
document: Document,
content: str,
filename: str = None,
suffix: str = None):
def write(
self,
document: Document,
content: str,
filename: Optional[str] = None,
suffix: Optional[str] = None,
):
self.__open_file(document, filename, suffix)
if self.file:
self.file.write(content)
Expand Down
20 changes: 11 additions & 9 deletions corpus2alpino/targets/memory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from os import path
from pathlib import Path
from typing import Optional

from corpus2alpino.abstracts import Target
from corpus2alpino.models import Document
Expand All @@ -9,13 +8,16 @@ class MemoryTarget(Target):
"""
Combine output in memory.
"""
buffer = ''

def write(self,
document: Document,
content: str,
filename: str = None,
suffix: str = None):
buffer = ""

def write(
self,
document: Document,
content: str,
filename: Optional[str] = None,
suffix: Optional[str] = None,
):
"""
Write all lines to stdout.
"""
Expand All @@ -25,7 +27,7 @@ def flush(self):
try:
return self.buffer
finally:
self.buffer = ''
self.buffer = ""

def close(self):
return

0 comments on commit e52c301

Please sign in to comment.