-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
62 lines (54 loc) · 1.88 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import csv, codecs
try:
from cStringIO import StringIO # CPython 2.x
except ImportError:
try:
from StringIO import StringIO # possibly other interpretors
except ImportError:
from io import StringIO # Python 3
import os
import sys
from gzip import GzipFile
from bz2 import BZ2File
COMPRESSED_FILE_CLASSES = {'.gz': GzipFile,
'.bz2': BZ2File}
try:
from lzma import LZMAFile
COMPRESSED_FILE_CLASSES['.xz'] = LZMAFile
except ImportError:
pass
def open_compressed_file(filename):
"""Open a possibly compressed file. '-' stands for stdin"""
global COMPRESSED_FILE_CLASSES
if filename == '-':
return sys.stdin
ext = os.path.splitext(filename)[1]
f = COMPRESSED_FILE_CLASSES.get(ext, open)(filename)
dir(f) # workaround for https://bugzilla.redhat.com/show_bug.cgi?id=720111
return f
# taken from http://docs.python.org/library/csv.html
class UnicodeWriter: # TODO make it work in Python 3
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)