forked from AlertaDengue/PySUS
-
Notifications
You must be signed in to change notification settings - Fork 12
/
readdbc.py
110 lines (93 loc) · 3.18 KB
/
readdbc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
u"""
Created on 16/08/16
by fccoelho
license: GPL V3 or Later
"""
import os
import csv
import gzip
from tqdm import tqdm
from io import BytesIO
from tempfile import NamedTemporaryFile
import geopandas as gpd
import pandas as pd
from dbfread import DBF
try:
from pysus.utilities._readdbc import ffi, lib
except (ImportError, ModuleNotFoundError):
from _readdbc import ffi, lib
def read_dbc(filename, encoding="utf-8", raw=False):
"""
Opens a DATASUS .dbc file and return its contents as a pandas
Dataframe.
:param filename: .dbc filename
:param encoding: encoding of the data
:param raw: Skip type conversion. Set it to True to avoid type conversion errors
:return: Pandas Dataframe.
"""
if isinstance(filename, str):
filename = filename.encode()
with NamedTemporaryFile(delete=False) as tf:
dbc2dbf(filename, tf.name.encode())
try:
dbf = DBF(tf.name, encoding=encoding, raw=raw)
df = gpd.GeoDataFrame(list(dbf))
except ValueError as exc:
dbf = DBF(tf.name, encoding=encoding, raw=not raw)
df = gpd.GeoDataFrame(list(dbf))
os.unlink(tf.name)
return df
def dbc2dbf(infile, outfile):
"""
Converts a DATASUS dbc file to a DBF database.
:param infile: .dbc file name
:param outfile: name of the .dbf file to be created.
"""
if isinstance(infile, str):
infile = infile.encode()
if isinstance(outfile, str):
outfile = outfile.encode()
p = ffi.new("char[]", os.path.abspath(infile))
q = ffi.new("char[]", os.path.abspath(outfile))
lib.dbc2dbf([p], [q])
# print(os.path.exists(outfile))
def read_dbc_geopandas(filename, encoding="utf-8"):
"""
Opens a DATASUS .dbc file and return its contents as a pandas
Dataframe, using geopandas
:param filename: .dbc filename
:param encoding: encoding of the data
:return: Pandas Dataframe.
"""
if isinstance(filename, str):
filename = filename
with NamedTemporaryFile(delete=False) as tf:
out = tf.name + ".dbf"
dbc2dbf(filename, out)
dbf = gpd.read_file(out, encoding=encoding).drop("geometry", axis=1)
df = pd.DataFrame(dbf)
os.unlink(out)
os.unlink(tf.name)
return df
def read_dbc_dbf(filename: str):
if filename.endswith(('dbc', 'DBC')):
df = read_dbc(filename, encoding="iso-8859-1")
elif filename.endswith(("DBF", "dbf")):
dbf = DBF(filename, encoding="iso-8859-1")
df = pd.DataFrame(list(dbf))
return df
def dbf_to_csvgz(filename: str, encoding: str='iso-8859-1'):
"""
Streams a dbf file to gzipped CSV file. The Gzipped csv will be saved on the same path but with a csv.gz extension.
:param filename: path to the dbf file
"""
data = DBF(filename, encoding=encoding, raw=False)
fn = os.path.splitext(filename)[0] + '.csv.gz'
with gzip.open(fn, 'wt') as gzf:
for i, d in tqdm(enumerate(data), desc='Converting',):
if i == 0:
csvwriter = csv.DictWriter(gzf, fieldnames=d.keys())
csvwriter.writeheader()
csvwriter.writerow(d)
else:
csvwriter.writerow(d)