Skip to content

Commit

Permalink
Merge fca29fa into f2fdef0
Browse files Browse the repository at this point in the history
  • Loading branch information
oliverkinch committed Sep 8, 2021
2 parents f2fdef0 + fca29fa commit 9ba26ce
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 135 deletions.
131 changes: 0 additions & 131 deletions .gitignore

This file was deleted.

2 changes: 1 addition & 1 deletion danlp/datasets/dacoref.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def load_as_conllu(self, predefined_splits: bool = False):
or a list of train, dev, test split parsed conllu list
depending on predefined_split
"""
with open('{}/CDT_coref.conllu'.format(self.dataset_dir)) as f:
with open('{}/CDT_coref.conllu'.format(self.dataset_dir), encoding='utf-8') as f:
conlist = conllu.parse(f.read(), fields=["id", "form", "lemma", "upos", 'xpos', 'feats', 'head', 'deprel','deps', 'misc', 'coref_id', 'coref_rel', 'doc_id', 'qid'])

if predefined_splits==False:
Expand Down
2 changes: 1 addition & 1 deletion danlp/datasets/ddt.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def load_with_spacy(self):
json_path = os.path.join(self.dataset_dir, "ddt.{}.json".format(part))

if not os.path.isfile(json_path): # Convert the conllu files to json
with open(conll_path, 'r') as file:
with open(conll_path, 'r', encoding='utf-8') as file:
file_as_string = file.read()
file_as_string = file_as_string.replace("name=", "").replace("|SpaceAfter=No", "")
file_as_json = conllu2json(file_as_string)
Expand Down
3 changes: 2 additions & 1 deletion danlp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def extract_single_file_from_zip(cache_dir: str, file_in_zip: str, dest_full_pat
tmp_path = os.path.join(cache_dir, ''.join(random_string()))

outpath = zip_file.extract(file_in_zip, path=tmp_path)
os.rename(outpath, dest_full_path)
if not os.path.exists(dest_full_path):
os.rename(outpath, dest_full_path)

shutil.rmtree(tmp_path)
2 changes: 1 addition & 1 deletion docs/docs/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ dannet.meanings(word)
# part-of-speech (returns a list comprised in 'Noun', 'Verb' or 'Adjective')
dannet.pos(word)
# wordnet relations (EUROWORDNET or WORDNETOWL)
dannet.wordnet_relations(word, eurowordnet=True))
dannet.wordnet_relations(word, eurowordnet=True)
# word ids
dannet._word_ids(word)
# synset ids
Expand Down

0 comments on commit 9ba26ce

Please sign in to comment.