Permalink
Browse files

Adding type annotations to several files (#760)

* add python-crfsuite

* add type annotations for lat ex file

* ch w2v back

* ch back sent tok

* finish mypy and pylint updates

* add mypy annotations and pylint fixes

* fix tests for refactored phil and contribs modules

* re-added contribs file
  • Loading branch information...
kylepjohnson committed May 7, 2018
1 parent 36ba994 commit 667ba6fd6c317f06b5a48ee3f6229040d1a3d59e
View
@@ -1,3 +1,6 @@
# mypy
.mypy_cache
*.py[cod]
# C extensions
View
@@ -15,7 +15,7 @@
from cltk.utils.file_operations import make_cltk_path
from cltk.utils.file_operations import open_pickle
from cltk.utils.frequency import Frequency
from cltk.utils.philology import Philology
from cltk.utils import philology
__license__ = 'MIT License. See LICENSE.'
@@ -86,7 +86,6 @@ def test_make_list_from_corpus_assert(self):
def test_concordance_from_string(self):
"""Test ``write_concordance_from_string()`` for file writing completion
of concordance builder. Doesn't test quality of output."""
philology = Philology()
text = 'felices cantus ore sonante dedit'
philology.write_concordance_from_string(text, 'test_string')
file = os.path.expanduser('~/cltk_data/user_data/concordance_test_string.txt')
@@ -96,30 +95,28 @@ def test_concordance_from_string(self):
def test_concordance_from_file(self):
"""Test ``write_concordance_from_file()`` for file writing completion
of concordance builder. Doesn't test quality of output."""
philology = Philology()
file = 'cltk/tests/bad_pickle.pickle'
philology.write_concordance_from_file(file, 'test_file')
file = os.path.expanduser('~/cltk_data/user_data/concordance_test_file.txt')
is_file = os.path.isfile(file)
text_file = 'cltk/tests/text-file.txt'
philology.write_concordance_from_file(text_file, 'test_file')
file_conc = os.path.expanduser('~/cltk_data/user_data/concordance_test_file.txt')
is_file = os.path.isfile(file_conc)
self.assertTrue(is_file)
def test_concordance_from_file_ioerror(self):
"""Test ``write_concordance_from_file()`` for file writing completion
of concordance builder, with IOError. Doesn't test quality of output."""
philology = Philology()
bad_path = '/cltk_data/user_data/concordance_test_file.txt'
non_existent_file = '/not-there.txt'
with self.assertRaises(IOError):
philology.write_concordance_from_file(bad_path, 'test_file')
philology.write_concordance_from_file(non_existent_file, 'test_file')
def test_contribs_find_write_contribs(self):
"""Test contrib writing function."""
file = 'contributors.md'
file_contribs = 'contributors.md'
try:
os.remove(file)
os.remove(file_contribs)
except FileNotFoundError:
logger.info("No file to remove at '%s'. Continuing.", file)
logger.info("No file to remove at '%s'. Continuing.", file_contribs)
find_write_contribs()
contribs_file = os.path.isfile(file)
contribs_file = os.path.isfile(file_contribs)
self.assertTrue(contribs_file)
def test_get_authors(self):
@@ -132,7 +129,6 @@ def test_scantree(self):
a_generator = scantree('cltk')
self.assertEqual(str(type(a_generator)), "<class 'generator'>")
def test_write_contribs(self):
"""Test file writer for contribs module."""
# rm old
View
@@ -0,0 +1,4 @@
All that we are is the result of what we have thought: it is
founded on our thoughts, it is made up of our thoughts. If a man
speaks or acts with an evil thought, pain follows him, as the wheel
follows the foot of the ox that draws the carriage.
@@ -5,12 +5,14 @@
comprehensive. Additions to the exceptions list are welcome. PJB
"""
que_exceptions = []
n_exceptions = []
ne_exceptions = []
ue_exceptions = []
ve_exceptions = []
st_exceptions = []
from typing import List
que_exceptions = [] # type: List[str]
n_exceptions = [] # type: List[str]
ne_exceptions = [] # type: List[str]
ue_exceptions = [] # type: List[str]
ve_exceptions = [] # type: List[str]
st_exceptions = [] # type: List[str]
# quisque
que_exceptions += ['quisque', 'quidque', 'quicque', 'quodque', 'cuiusque', 'cuique', 'quemque', 'quoque', 'quique', 'quaeque', 'quorumque', 'quarumque', 'quibusque', 'quosque', 'quasque']
View
@@ -6,38 +6,44 @@
import os
import re
from typing import Dict
from typing import List
from typing import Generator
from typing import Pattern # pylint: disable=unused-import
from typing import Union # pylint: disable=unused-import
from typing import Tuple # pylint: disable=unused-import
from typing import IO # pylint: disable=unused-import
from cltk.utils.cltk_logger import logger
__author__ = ['Kyle P. Johnson <kyle@kyle-p-johnson.com>']
__license__ = 'MIT License. See LICENSE.'
__author__ = ['Kyle P. Johnson <kyle@kyle-p-johnson.com>'] # type: List[str]
__license__ = 'MIT License. See LICENSE.' # type: str
def eval_str(input_str):
def eval_str_to_list(input_str: str) -> List[str]:
"""Turn str into str or tuple."""
inner_cast = ast.literal_eval(input_str)
if type(inner_cast) == str:
inner_list = [inner_cast]
elif type(inner_cast) == tuple:
inner_list = list(inner_cast)
inner_cast = ast.literal_eval(input_str) # type: List[str]
if isinstance(inner_cast, list):
return inner_cast
else:
raise ValueError
return inner_list
def get_authors(filepath):
def get_authors(filepath: str) -> List[str]:
"""Open file and check for author info."""
str_oneline = r'(^__author__ = \[)(.*)(\])'
comp_oneline = re.compile(str_oneline, re.MULTILINE)
str_oneline = r'(^__author__ = )(\[.*?\])' # type" str
comp_oneline = re.compile(str_oneline, re.MULTILINE) # type: Pattern[str]
with open(filepath) as file_open:
file_read = file_open.read()
file_read = file_open.read() # type: str
match = comp_oneline.findall(file_read)
if match:
inner_str = match[0][1]
inner_str = eval_str(inner_str)
return inner_str
inner_list_as_str = match[0][1] # type: str
inner_list = eval_str_to_list(inner_list_as_str) # type: List[str]
return inner_list
return list()
def scantree(path):
def scantree(path: str) -> Generator:
"""Recursively yield DirEntry objects for given directory."""
for entry in os.scandir(path):
if entry.is_dir(follow_symlinks=False):
@@ -47,47 +53,48 @@ def scantree(path):
yield entry
def write_contribs(def_dict_list):
def write_contribs(def_dict_list: Dict[str, List[str]]) -> None:
"""Write to file, in current dir, 'contributors.md'."""
file_str = ''
note = '# Contributors\nCLTK Core authors, ordered alphabetically by first name\n\n'
file_str = '' # type: str
note = '# Contributors\nCLTK Core authors, ordered alphabetically by first name\n\n' # type: str # pylint: disable=line-too-long
file_str += note
for contrib in def_dict_list:
file_str += '## ' + contrib + '\n'
for module in def_dict_list[contrib]:
file_str += '* ' + module + '\n'
file_str += '\n'
file_name = 'contributors.md'
with open(file_name, 'w') as file_open:
file_name = 'contributors.md' # type: str
with open(file_name, 'w') as file_open: # type: IO
file_open.write(file_str)
logger.info('Wrote contribs file at "%s".', file_name)
def sort_def_dict_value_list_def_dict(def_dict):
def sort_def_dict(def_dict: Dict[str, List[str]]) -> Dict[str, List[str]]:
"""Sort values of the lists of a defaultdict(list)."""
for dd_key, dd_list in def_dict.items():
for _, dd_list in def_dict.items():
dd_list.sort()
return def_dict
def find_write_contribs():
def find_write_contribs() -> None:
"""Look for files, find authors, sort, write file."""
map_file_auth = {}
for x in scantree('cltk'):
filepath = x.path
authors_list = get_authors(filepath)
map_file_auth = {} # type: Dict[str, List[str]]
for filename in scantree('cltk'):
filepath = filename.path # type: str
authors_list = get_authors(filepath) # type: List[str]
if authors_list:
map_file_auth[filepath] = authors_list
map_auth_file = defaultdict(list)
map_auth_file = defaultdict(list) # type: Dict[str, List[str]]
for file, authors_file in map_file_auth.items():
for author in authors_file:
map_auth_file[author].append(file)
# now sort the str contents of the list value
map_auth_file = sort_def_dict_value_list_def_dict(map_auth_file)
map_auth_file_alpha = OrderedDict(sorted(map_auth_file.items()))
map_auth_file = sort_def_dict(map_auth_file)
map_auth_file_sorted = sorted(map_auth_file.items()) # type: List[Tuple[str, List[str]]]
map_auth_file = OrderedDict(map_auth_file_sorted)
write_contribs(map_auth_file_alpha)
write_contribs(map_auth_file)
if __name__ == "__main__":
Oops, something went wrong.

0 comments on commit 667ba6f

Please sign in to comment.