Skip to content

Commit

Permalink
Merge branches 'dev' and 'docs/readthedocs_integration'
Browse files Browse the repository at this point in the history
# Conflicts:
#	deeppavlov/models/ner/README.md
#	deeppavlov/models/slotfill/README.md
#	deeppavlov/models/spelling_correction/README.md
#   resolved
  • Loading branch information
nikolay-bushkov committed Jul 24, 2018
2 parents 5e6633a + 405fa5b commit 0e9904d
Show file tree
Hide file tree
Showing 48 changed files with 105 additions and 96 deletions.
3 changes: 2 additions & 1 deletion deeppavlov/configs/go_bot/gobot_dstc2.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/fasttext.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"telegram_utils": "GoalOrientedBot",
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/go_bot/gobot_dstc2_all.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/fasttext.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"telegram_utils": "GoalOrientedBot",
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/go_bot/gobot_dstc2_best.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/fasttext.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"telegram_utils": "GoalOrientedBot",
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/go_bot/gobot_dstc2_minimal.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/fasttext.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"epochs": 200,
"batch_size": 4,
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/odqa/en_odqa_infer_wiki.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
"metadata": {
"requirements": [
"../dp_requirements/tf-gpu.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"server_utils": "ODQA"
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/ranking/en_ranker_tfidf_wiki.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
},
"metadata": {
"requirements": [
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"server_utils": "Ranker"
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/seq2seq_go_bot/bot_kvret.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@
"metadata": {
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"telegram_utils": "Seq2SeqGoalOrientedBot",
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/configs/seq2seq_go_bot/bot_kvret_infer.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@
"metadata": {
"requirements": [
"../dp_requirements/tf.txt",
"../dp_requirements/spacy.txt"
"../dp_requirements/spacy.txt",
"../dp_requirements/en_core_web_sm.txt"
],
"labels": {
"telegram_utils": "Seq2SeqGoalOrientedBot",
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/core/commands/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def predict_on_stream(config_path, batch_size=1, file_path=None):
raise RuntimeError('To process data from terminal please use interact mode')
f = sys.stdin
else:
f = open(file_path)
f = open(file_path, encoding='utf8')

config = read_json(config_path)
model: Chainer = build_model_from_config(config)
Expand Down
4 changes: 2 additions & 2 deletions deeppavlov/core/common/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@


def read_json(fpath):
with open(fpath) as fin:
with open(fpath, encoding='utf8') as fin:
return json.load(fin)


def save_json(data, fpath):
with open(fpath, 'w') as fout:
with open(fpath, 'w', encoding='utf8') as fout:
return json.dump(data, fout, ensure_ascii=False, indent=2)


Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/core/common/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_logger(logger_name):
config_dir = Path(__file__).resolve().parent
log_config_path = Path(config_dir, '..', '..', LOG_CONFIG_FILENAME).resolve()

with open(log_config_path) as log_config_json:
with open(log_config_path, encoding='utf8') as log_config_json:
log_config = json.load(log_config_json)

configured_loggers = [log_config.get('root', {})] + log_config.get('loggers', [])
Expand Down
4 changes: 2 additions & 2 deletions deeppavlov/core/data/simple_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __call__(self, batch, **kwargs):

def save(self):
log.info("[saving vocabulary to {}]".format(self.save_path))
with self.save_path.open('wt') as f:
with self.save_path.open('wt', encoding='utf8') as f:
for n in range(len(self)):
token = self._i2t[n]
cnt = self.freqs[token]
Expand All @@ -93,7 +93,7 @@ def load(self):
if self.load_path.is_file():
log.info("[loading vocabulary from {}]".format(self.load_path))
tokens, counts = [], []
for ln in self.load_path.open('r'):
for ln in self.load_path.open('r', encoding='utf8'):
token, cnt = ln.split('\t', 1)
tokens.append(token)
counts.append(int(cnt))
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/core/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def copytree(src: Path, dest: Path):

def load_vocab(vocab_path):
vocab_path = Path(vocab_path)
with vocab_path.open() as f:
with vocab_path.open(encoding='utf8') as f:
return f.read().split()


Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/dataset_iterators/dstc2_ner_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, data, dataset_path, seed=None, shuffle=False):
# TODO: include slot vals to dstc2.tar.gz
dataset_path = expand_path(dataset_path) / 'slot_vals.json'
self._build_slot_vals(dataset_path)
with open(dataset_path) as f:
with open(dataset_path, encoding='utf8') as f:
self._slot_vals = json.load(f)
for data_type in ['train', 'test', 'valid']:
bio_markup_data = self._preprocess(data.get(data_type, []))
Expand Down
6 changes: 3 additions & 3 deletions deeppavlov/dataset_readers/babi_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def read(self, file_path):
responses = self._get_responses(file_path, dialogs)

responses_path = Path(paths.deeppavlov_root) / 'responses.txt'
responses_path.write_text('\n'.join(responses))
responses_path.write_text('\n'.join(responses), encoding='utf8')

trainset = [{'context': u, 'response': r} for u, r in zip(utterances, responses)]

Expand Down Expand Up @@ -72,7 +72,7 @@ def filter_(dialogs):
filtered_.append(row)
return filtered_

with open(file_path) as f:
with open(file_path, encoding='utf8') as f:
dialogs = filter_([rm_index(row.split('\t')) for row in f.read().split('\n')])
# organize dialogs -> dialog_indices
prev_idx = -1
Expand Down Expand Up @@ -110,7 +110,7 @@ def _get_responses(self, file_path, dialogs=None):
#TODO: move save_vocab to babi_dataset
@staticmethod
def save_vocab(dialogs, fpath):
with open(fpath, 'w') as f:
with open(fpath, 'w', encoding='utf8') as f:
words = sorted(list(set(chain.from_iterable(
[instance['context'].split() for dialog in dialogs for instance in dialog]))))
f.write(' '.join(words))
2 changes: 1 addition & 1 deletion deeppavlov/dataset_readers/conll2003_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def read(self, dir_path: str, dataset_name='conll2003', provide_pos=False):

def parse_ner_file(self, file_name: Path):
samples = []
with file_name.open() as f:
with file_name.open(encoding='utf8') as f:
tokens = ['<DOCSTART>']
pos_tags = ['O']
tags = ['O']
Expand Down
4 changes: 2 additions & 2 deletions deeppavlov/dataset_readers/dstc2_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _format_turn(turn):

@staticmethod
def _iter_file(file_path):
for ln in open(file_path, 'rt'):
for ln in open(file_path, 'rt', encoding='utf8'):
if ln.strip():
yield json.loads(ln)
else:
Expand Down Expand Up @@ -236,7 +236,7 @@ def _format_turn(turn):

@staticmethod
def _iter_file(file_path):
for ln in open(file_path, 'rt'):
for ln in open(file_path, 'rt', encoding='utf8'):
if ln.strip():
yield json.loads(ln)
else:
Expand Down
10 changes: 5 additions & 5 deletions deeppavlov/dataset_readers/insurance_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,19 @@ def download_data(self, data_path):

def _build_context2toks_vocabulary(self, train_f, val_f, test_f):
contexts = []
with open(train_f, 'r') as f:
with open(train_f, 'r', encoding='utf8') as f:
data = f.readlines()
for eli in data:
eli = eli[:-1]
c, _ = eli.split('\t')
contexts.append(c)
with open(val_f, 'r') as f:
with open(val_f, 'r', encoding='utf8') as f:
data = f.readlines()
for eli in data:
eli = eli[:-1]
_, c, _ = eli.split('\t')
contexts.append(c)
with open(test_f, 'r') as f:
with open(test_f, 'r', encoding='utf8') as f:
data = f.readlines()
for eli in data:
eli = eli[:-1]
Expand All @@ -55,7 +55,7 @@ def preprocess_data_train(self, fname):
positive_responses_pool = []
contexts = []
responses = []
with open(fname, 'r') as f:
with open(fname, 'r', encoding='utf8') as f:
data = f.readlines()
for eli in data:
eli = eli[:-1]
Expand All @@ -75,7 +75,7 @@ def preprocess_data_valid_test(self, fname):
neg_responses_pool = []
contexts = []
pos_responses = []
with open(fname, 'r') as f:
with open(fname, 'r', encoding='utf8') as f:
data = f.readlines()
for eli in data:
eli = eli[:-1]
Expand Down
4 changes: 3 additions & 1 deletion deeppavlov/dataset_readers/kvret_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _check_dialog(dialog):
# return False
return True

@staticmethod
def _filter_duplicates(dialog):
last_turn, last_utter = None, None
for turn in dialog:
Expand All @@ -121,7 +122,8 @@ def _filter_duplicates(dialog):

@classmethod
def _iter_file(cls, file_path):
data = json.load(open(file_path, 'rt'))
with open(file_path, 'rt', encoding='utf8') as f:
data = json.load(f)
for i, sample in enumerate(data):
dialog = list(cls._filter_duplicates(sample['dialogue']))
if cls._check_dialog(dialog):
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/dataset_readers/squad_dataset_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def read(self, dir_path: str, dataset='SQuAD'):

dataset = {}
for f in required_files:
data = json.load((dir_path / f).open('r'))
with dir_path.joinpath(f).open('r', encoding='utf8') as fp:
data = json.load(fp)
if f == 'dev-v1.1.json':
dataset['valid'] = data
else:
Expand Down
6 changes: 3 additions & 3 deletions deeppavlov/dataset_readers/typos_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def build(data_path: str):
@classmethod
def read(cls, data_path: str, *args, **kwargs):
fname = cls.build(data_path)
with fname.open(newline='') as tsvfile:
with fname.open(newline='', encoding='utf8') as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t')
next(reader)
res = [(mistake, correct) for mistake, correct in reader]
Expand Down Expand Up @@ -73,7 +73,7 @@ def build(data_path: str):
data.append([typo.strip(), correct.strip()])

fname.parent.mkdir(parents=True, exist_ok=True)
with fname.open('w', newline='') as tsvfile:
with fname.open('w', newline='', encoding='utf8') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t')
for line in data:
writer.writerow(line)
Expand Down Expand Up @@ -108,7 +108,7 @@ def build(data_path: str):
@staticmethod
def read(data_path: str, *args, **kwargs):
fname = TyposKartaslov.build(data_path)
with open(str(fname), newline='') as csvfile:
with open(str(fname), newline='', encoding='utf8') as csvfile:
reader = csv.reader(csvfile, delimiter=';')
next(reader)
res = [(mistake, correct) for correct, mistake, weight in reader]
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/evolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def results_to_table(population, evolution, considered_metrics, result_file, res
for i in range(population_size):
with open(str(expand_path(Path(evolution.get_value_from_config(
population[i],
evolution.main_model_path + ["save_path"])).parent.joinpath("out.txt"))), "r") as fout:
evolution.main_model_path + ["save_path"])).parent.joinpath("out.txt"))), "r", encoding='utf8') as fout:
reports_data = fout.read().splitlines()[-2:]
reports = []
for j in range(2):
Expand Down
4 changes: 2 additions & 2 deletions deeppavlov/metrics/mrr_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def calc_mrr(rank):

def mrr_from_json(fname):
data = []
with open(fname) as f:
with open(fname, encoding='utf8') as f:
for line in f.readlines():
data += [json.loads(line)]
rank_i = []
Expand All @@ -55,7 +55,7 @@ def mrr_from_dict(data):

def make_json_predictions(fname, predictions):
data = []
with open(fname) as f:
with open(fname, encoding='utf8') as f:
for line in f.readlines():
data += [json.loads(line)]

Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/models/embedders/dict_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def load(self):
else:
log.info('Loading existing dictionary of embeddings from {}'.format(self.load_path))

with open(str(self.load_path)) as fin:
with open(self.load_path, encoding='utf8') as fin:
for line in fin:
values = line.rsplit(sep=' ', maxsplit=self.dim)
assert (len(values) == self.dim + 1)
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/models/embedders/glove_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def load(self, *args, **kwargs):
"""

# Check that header with n_words emb_dim present
with open(self.load_path) as f:
with open(self.load_path, encoding='utf8') as f:
header = f.readline()
if len(header.split()) != 2:
raise RuntimeError('The GloVe file must start with number_of_words embeddings_dim line! '
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/models/evolution/Results_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"KEY_MAIN_MODEL = \"main\"\n",
"POPULATION_SIZE = 2\n",
" \n",
"with open(CONFIG_FILE, \"r\") as f:\n",
"with open(CONFIG_FILE, \"r\", encoding='utf8') as f:\n",
" basic_params = json.load(f)\n",
"\n",
"set_deeppavlov_root(basic_params)\n",
Expand Down
4 changes: 2 additions & 2 deletions deeppavlov/models/go_bot/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,13 +330,13 @@ def save(self, *args, **kwargs):
def save_params(self):
path = str(self.save_path.with_suffix('.json').resolve())
log.info('[saving parameters to {}]'.format(path))
with open(path, 'w') as fp:
with open(path, 'w', encoding='utf8') as fp:
json.dump(self.opt, fp)

def load_params(self):
path = str(self.load_path.with_suffix('.json').resolve())
log.info('[loading parameters from {}]'.format(path))
with open(path, 'r') as fp:
with open(path, 'r', encoding='utf8') as fp:
params = json.load(fp)
for p in self.GRAPH_PARAMS:
if self.opt.get(p) != params.get(p):
Expand Down
9 changes: 5 additions & 4 deletions deeppavlov/models/go_bot/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,14 @@ def templates(self):
return self._templates

def load(self, filename):
for ln in open(filename, 'r'):
act, template = ln.strip('\n').split('\t', 1)
self.__setitem__(act, self.ttype.from_str(template))
with open(filename, 'r', encoding='utf8') as fp:
for ln in fp:
act, template = ln.strip('\n').split('\t', 1)
self.__setitem__(act, self.ttype.from_str(template))
return self

def save(self, filename):
with open(filename, 'w') as outfile:
with open(filename, 'w', encoding='utf8') as outfile:
for act in sorted(self.actions):
template = self.__getitem__(act)
outfile.write('{}\t{}\n'.format(act, template))
2 changes: 1 addition & 1 deletion deeppavlov/models/preprocessors/squad_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def fit(self, contexts, questions, *args, **kwargs):
else:
raise RuntimeError("SquadVocabEmbedder::fit: Unknown level: {}".format(self.level))

with (self.emb_folder / self.emb_file_name).open('r') as femb:
with (self.emb_folder / self.emb_file_name).open('r', encoding='utf8') as femb:
emb_voc_size, self.emb_dim = map(int, femb.readline().split())
for line in tqdm(femb, total=emb_voc_size):
line_split = line.strip().split(' ')
Expand Down

0 comments on commit 0e9904d

Please sign in to comment.