Skip to content

Commit

Permalink
Исправил разбор формулы контроля/условия и проверку дубликатов строк
Browse files Browse the repository at this point in the history
  • Loading branch information
WoolenSweater committed Aug 2, 2021
1 parent c454c2b commit 2c53542
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 38 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# CHANGELOG

### [1.2.3] - 2021-08-02

- Исправил разбор формулы контроля/условия. В них так же есть коды вида "01", тогда как в отчёте в коде нет нуля.
- Исправил проверку дубликатов строк. Ошибкой было считать дубли по всему отчёту. Теперь они считаются в рамках каждого раздела.


### [1.2.2] - 2021-07-30

- Исправил ошибку из-за которой значение, которое должно было быть NULL и заменяться методом isnull, не заменялось.
Expand Down
2 changes: 1 addition & 1 deletion docs/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
## 3. Проверка формата

* 3.1 Раздел `<код секции>` отсутствует в отчёте
* 3.2 Строка `<код строки>` повторяется `<кол-во>` раз(а)
* 3.2 Раздел `<код секции>`, cтрока `<код строки>` повторяется `<кол-во>` раз(а)
* 3.3 Раздел `<код секции>`, строка `<код строки>` не может быть пустой
* 3.4 Раздел `<код секции>`, строка `<код строки>`, графа `<код графы>` не может быть пустой
* 3.5 Раздел `<код секции>` не описан в шаблоне
Expand Down
7 changes: 7 additions & 0 deletions rosstat/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from collections import defaultdict


def str_int(v):
return str(int(v)) if v.isdigit() else v


class SchemaFormats(dict):
def _get_spec_code(self, sec_code, spec_idx):
'''Возвращает из указаной секции ключ специфики по её индексу'''
Expand Down Expand Up @@ -42,6 +46,9 @@ def __init__(self):
def __iter__(self):
return iter(sorted(set(self.keys), key=int))

def __repr__(self):
return '<MultiDict {}>'.format(list(zip(self.keys, self.values)))

def add(self, key, value):
self.keys.append(key)
self.values.append(value)
Expand Down
17 changes: 8 additions & 9 deletions rosstat/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from collections import defaultdict as defdict
from dataclasses import dataclass, InitVar, field as f
from lxml.etree import _ElementTree
from .helpers import MultiDict
from .schema import str_int
from .helpers import MultiDict, str_int


def max_divider(num, terms):
Expand Down Expand Up @@ -65,6 +64,11 @@ def filter(self, specs):
class Section:
code: str
rows: MultiDict = f(default_factory=MultiDict)
_row_counters: defdict = f(default_factory=lambda: defdict(int))

@property
def row_counters(self):
return self._row_counters

def items(self, codes=None):
'''Итерация по элементам раздела'''
Expand All @@ -77,8 +81,9 @@ def get_rows(self, code):
return self.rows.getall(code) or [Row(code, None, None, None)]

def add_row(self, row_code, row):
'''Добавление строки в раздел'''
'''Добавление строки в раздел и приращение счётчика'''
self.rows.add(row_code, row)
self._row_counters[(row_code, row.s1, row.s2, row.s3)] += 1


@dataclass
Expand All @@ -91,7 +96,6 @@ class Report:
_period_raw: str = None
_period_type: Optional[str] = None
_period_code: Optional[str] = None
_row_counters: defdict = f(default_factory=lambda: defdict(int))

def __repr__(self):
return '<Report title={_title}\ndata={_data}>'.format(**self.__dict__)
Expand Down Expand Up @@ -123,10 +127,6 @@ def period_type(self):
def period_code(self):
return self._period_code

@property
def row_counters(self):
return self._row_counters

def items(self):
'''Итерация по разделам отчёта'''
for sec_code, section in self._data.items():
Expand Down Expand Up @@ -161,7 +161,6 @@ def _read_data(self, xml):
row.add_col(col_code, col.text)
self._blank = False
section.add_row(row_code, row)
self._row_counters[(row_code, row.s1, row.s2, row.s3)] += 1
data[section_code] = section
return data

Expand Down
6 changes: 1 addition & 5 deletions rosstat/schema.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import traceback
from collections import defaultdict
from .helpers import SchemaFormats, NestedDefaultdict
from .helpers import SchemaFormats, NestedDefaultdict, str_int
from .validators import (AttrValidator, TitleValidator,
FormatValidator, ControlValidator)


def str_int(v):
return str(int(v)) if v.isdigit() else v


class Schema:
def __init__(self, xml_tree, *, skip_warns):
self.xml = xml_tree
Expand Down
19 changes: 7 additions & 12 deletions rosstat/validators/control/parser/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from functools import reduce
from .value import nullablefloat
from ..exceptions import NoElemToCompareError
from ....helpers import str_int

operator_map = {
'<': operator.lt,
Expand Down Expand Up @@ -51,10 +52,7 @@ def __neg__(self):

def __repr__(self):
return ('<Elem {}{}{} value={} bool={}>').format(
sorted(int(i) for i in self.section),
sorted(int(i) for i in self.rows if i.isdigit()),
sorted(int(i) for i in self.columns if i.isdigit()),
self.val, self.bool)
self.section, self.rows, self.columns, self.val, self.bool)

def __modify(self, elem, op_func):
self.rows |= elem.rows
Expand Down Expand Up @@ -126,9 +124,9 @@ def add_func(self, func, arg):
class ElemList:
def __init__(self, section, rows, columns,
s1=[None], s2=[None], s3=[None]):
self.section = section[0]
self.rows = set(rows)
self.columns = set(columns)
self.section = list(str_int(v) for v in section).pop()
self.rows = set(str_int(v) for v in rows)
self.columns = set(str_int(v) for v in columns)

self.specs = {1: s1, 2: s2, 3: s3}

Expand All @@ -137,10 +135,7 @@ def __init__(self, section, rows, columns,

def __repr__(self):
return '<ElemList [{}]{}{} funcs={} elems={}>'.format(
self.section,
sorted(int(i) for i in self.rows if i.isdigit()),
sorted(int(i) for i in self.columns if i.isdigit()),
self.funcs, self.elems)
self.section, self.rows, self.columns, self.funcs, self.elems)

def __neg__(self):
self._apply_unary('neg')
Expand Down Expand Up @@ -220,7 +215,7 @@ def _proc_row(self, raw_row, row_code, dimension):
'''
row = []
for col_code, value in self._read_columns(raw_row, dimension):
row.append(Elem(value, self.section, [row_code], [col_code]))
row.append(Elem(value, self.section, row_code, col_code))
return row

def _apply_funcs(self, report, params, ctx_elem):
Expand Down
5 changes: 3 additions & 2 deletions rosstat/validators/format/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ def __init__(self, sec_code):


class DuplicateError(FormatError):
def __init__(self, row_code, counter):
def __init__(self, sec_code, row_code, counter):
self.code = '2'
self.msg = 'Строка {} повторяется {} раз(а)'.format(row_code, counter)
self.msg = ('Раздел {}, cтрока {} повторяется {} раз(а)'
.format(sec_code, row_code, counter))


class EmptyRowError(FormatError):
Expand Down
15 changes: 7 additions & 8 deletions rosstat/validators/format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,13 @@ def _check_duplicates(self, report):
def __fmt_specs(specs):
return ' '.join(f's{i}={s}' for i, s in enumerate(specs, 1) if s)

for row, counter in report.row_counters.items():
if counter > 1:
row_code, *specs = row
if any(specs):
row = f'{row_code} {__fmt_specs(specs)}'
else:
row = row_code
raise DuplicateError(row, counter)
for sec_code, section in report.items():
for row, counter in section.row_counters.items():
if counter > 1:
row_code, *specs = row
if any(specs):
row_code = f'{row_code} {__fmt_specs(specs)}'
raise DuplicateError(sec_code, row_code, counter)

def _check_required(self, report):
'''Проверка наличия обязательных к заполнению строк и значений'''
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='rosstat-flc',
version='1.2.2',
version='1.2.3',
packages=find_packages(),
description='Tool for format-logistic control of reports sent to RosStat',
long_description=open('README.md', 'r').read(),
Expand Down

0 comments on commit 2c53542

Please sign in to comment.