-
-
Notifications
You must be signed in to change notification settings - Fork 786
/
base_formats.py
231 lines (173 loc) · 5.88 KB
/
base_formats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import warnings
import tablib
from django.conf import settings
from tablib.formats import registry
class Format:
def get_title(self):
return type(self)
def create_dataset(self, in_stream):
"""
Create dataset from given string.
"""
raise NotImplementedError()
def export_data(self, dataset, **kwargs):
"""
Returns format representation for given dataset.
"""
raise NotImplementedError()
def is_binary(self):
"""
Returns if this format is binary.
"""
return True
def get_read_mode(self):
"""
Returns mode for opening files.
"""
return "rb"
def get_extension(self):
"""
Returns extension for this format files.
"""
return ""
def get_content_type(self):
# For content types see
# https://www.iana.org/assignments/media-types/media-types.xhtml
return "application/octet-stream"
@classmethod
def is_available(cls):
return True
def can_import(self):
return False
def can_export(self):
return False
class TablibFormat(Format):
TABLIB_MODULE = None
CONTENT_TYPE = "application/octet-stream"
def __init__(self, encoding=None):
self.encoding = encoding
def get_format(self):
"""
Import and returns tablib module.
"""
if not self.TABLIB_MODULE:
raise AttributeError("TABLIB_MODULE must be defined")
key = self.TABLIB_MODULE.split(".")[-1].replace("_", "")
return registry.get_format(key)
@classmethod
def is_available(cls):
try:
cls().get_format()
except (tablib.core.UnsupportedFormat, ImportError):
return False
return True
def get_title(self):
return self.get_format().title
def create_dataset(self, in_stream, **kwargs):
return tablib.import_set(in_stream, format=self.get_title(), **kwargs)
def export_data(self, dataset, **kwargs):
if getattr(settings, "IMPORT_EXPORT_ESCAPE_FORMULAE_ON_EXPORT", False) is True:
self._escape_formulae(dataset)
return dataset.export(self.get_title(), **kwargs)
def get_extension(self):
return self.get_format().extensions[0]
def get_content_type(self):
return self.CONTENT_TYPE
def can_import(self):
return hasattr(self.get_format(), "import_set")
def can_export(self):
return hasattr(self.get_format(), "export_set")
def _escape_formulae(self, dataset):
def _do_escape(s):
return s.replace("=", "", 1) if s.startswith("=") else s
for _ in dataset:
row = dataset.lpop()
row = [_do_escape(str(cell)) for cell in row]
dataset.append(row)
class TextFormat(TablibFormat):
def create_dataset(self, in_stream, **kwargs):
if isinstance(in_stream, bytes) and self.encoding:
in_stream = in_stream.decode(self.encoding)
return super().create_dataset(in_stream, **kwargs)
def get_read_mode(self):
return "r"
def is_binary(self):
return False
class CSV(TextFormat):
TABLIB_MODULE = "tablib.formats._csv"
CONTENT_TYPE = "text/csv"
class JSON(TextFormat):
TABLIB_MODULE = "tablib.formats._json"
CONTENT_TYPE = "application/json"
class YAML(TextFormat):
TABLIB_MODULE = "tablib.formats._yaml"
# See https://stackoverflow.com/questions/332129/yaml-mime-type
CONTENT_TYPE = "text/yaml"
class TSV(TextFormat):
TABLIB_MODULE = "tablib.formats._tsv"
CONTENT_TYPE = "text/tab-separated-values"
class ODS(TextFormat):
TABLIB_MODULE = "tablib.formats._ods"
CONTENT_TYPE = "application/vnd.oasis.opendocument.spreadsheet"
class HTML(TextFormat):
TABLIB_MODULE = "tablib.formats._html"
CONTENT_TYPE = "text/html"
class XLS(TablibFormat):
TABLIB_MODULE = "tablib.formats._xls"
CONTENT_TYPE = "application/vnd.ms-excel"
def create_dataset(self, in_stream):
"""
Create dataset from first sheet.
"""
import xlrd
xls_book = xlrd.open_workbook(file_contents=in_stream)
dataset = tablib.Dataset()
sheet = xls_book.sheets()[0]
dataset.headers = sheet.row_values(0)
for i in range(1, sheet.nrows):
dataset.append(sheet.row_values(i))
return dataset
class XLSX(TablibFormat):
TABLIB_MODULE = "tablib.formats._xlsx"
CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
def create_dataset(self, in_stream):
"""
Create dataset from first sheet.
"""
from io import BytesIO
import openpyxl
# 'data_only' means values are read from formula cells, not the formula itself
xlsx_book = openpyxl.load_workbook(
BytesIO(in_stream), read_only=True, data_only=True
)
dataset = tablib.Dataset()
sheet = xlsx_book.active
# obtain generator
rows = sheet.rows
dataset.headers = [cell.value for cell in next(rows)]
for row in rows:
row_values = [cell.value for cell in row]
dataset.append(row_values)
return dataset
def export_data(self, dataset, **kwargs):
# #1698 temporary catch for deprecation warning in openpyxl
# this catch block must be removed when openpyxl updated
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
return super().export_data(dataset, **kwargs)
#: These are the default formats for import and export. Whether they can be
#: used or not is depending on their implementation in the tablib library.
DEFAULT_FORMATS = [
fmt
for fmt in (
CSV,
XLS,
XLSX,
TSV,
ODS,
JSON,
YAML,
HTML,
)
if fmt.is_available()
]