-
Notifications
You must be signed in to change notification settings - Fork 0
/
oked.py
74 lines (56 loc) · 1.86 KB
/
oked.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import luigi
import attr
from luigi.util import requires
from settings import TMP_DIR
from tasks.base import GzipToFtp, BaseConfig, WebExcelFileParsingToCsv
@attr.s
class Row:
code = attr.ib(default='')
namekz = attr.ib(default='')
nameru = attr.ib(default='')
lv0 = attr.ib(default='')
lv1 = attr.ib(default='')
lv2 = attr.ib(default='')
lv3 = attr.ib(default='')
url = 'https://stat.gov.kz/api/getFile/?docId=ESTAT310324'
def update_rows(rows):
""" Complete each row with levels """
curr_root = rows[0].code
for i, r in enumerate(rows):
if not r.code:
rows.pop(i)
continue
# build new code
# A, B, C, etc are like roots for a certain code
if ('.' in r.code) or (r.code.replace('.', '').isdigit()):
code = f'{curr_root}.{r.code}'
else:
code = r.code
curr_root = r.code
r.code = r.code.replace('.', '')
b = code.split('.')
size = len(b)
if size == 2:
r.lv0 = b[0]
elif size == 3:
if len(b[2]) == 1:
r.lv0, r.lv1 = b[0], b[1]
else:
r.lv0, r.lv1, r.lv2 = b[0], b[1], f'{b[1]}{b[2][0]}'
elif size == 4:
r.lv0, r.lv1, r.lv2, r.lv3 = b[0], b[1], f'{b[1]}{b[2][0]}', f'{b[1]}{b[2]}'
@requires(WebExcelFileParsingToCsv)
class GzipOkedToFtp(GzipToFtp):
pass
class Oked(luigi.WrapperTask):
def requires(self):
return GzipOkedToFtp(url=url,
monthly=True,
name='sgov_oked',
struct=Row,
directory=TMP_DIR,
skiptop=3,
usecolumns='A:C',
transform_callback=update_rows)
if __name__ == '__main__':
luigi.run()