Skip to content

Commit

Permalink
* fixed a lot of issues due to inconsistencies of the code initially
Browse files Browse the repository at this point in the history
    written for Python2, mainly confusions between byte strings
    and unicode strings. The new code tries to use byte strings for
    file-like streams and unicode strings for in-memory objects.
  * added JavaDoc strings to document thoroughly functions and methods,
     mainly in odf/opendocument.py
  * added a config.dox file to allow one to build a documentation in HTML
    and LaTeX, for library developpers and users.
  * added many assert() clauses to ensure the types of parameters in
    functions and methods
  * modified the utilities to make them usable with both Python2 and Python3
  * implemented a feature announced in csv2ods manfiles but not previously
    active: -c / --encoding switch to take in account the encoding of
    the CSV source file.
  * added rules to build the developer's documentation and install it in
    usr/share/python-odf/API-doc/html
  * added a dependency python-odf-doc -> libjs-jquery necessary because
    of HTML code output by Doxygen
  *

 -- Georges Khaznadar <georgesk@debian.org>  Tue, 28 Oct 2014 10:41:32 +0100
  • Loading branch information
georgesk committed Oct 28, 2014
1 parent 8eb8d7b commit a6dc447
Show file tree
Hide file tree
Showing 40 changed files with 1,301 additions and 767 deletions.
10 changes: 9 additions & 1 deletion csv2ods/Makefile
Expand Up @@ -9,6 +9,14 @@ txt: csv2ods.txt
xmlto txt $<

clean:
rm -f *.txt odf
rm -f *.1 *~ *.txt odf test.csv test2.ods test3.ods
odf:
ln -s ../odf

test: clean odf
@echo 1,2,3,4 > test.csv
@echo 5,6,7,8 >> test.csv
@echo 9,10,11,12 >> test.csv
@python2 csv2ods -i test.csv -o test2.ods
@python3 csv2ods -i test.csv -o test3.ods
@echo created files test.csv, test2.ods and test3.ods
335 changes: 201 additions & 134 deletions csv2ods/csv2ods
Expand Up @@ -16,147 +16,214 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Contributor(s):
#
# Søren Roug
#
# Oct 2014: Georges Khaznadar <georgesk@debian.org>
# - ported to Python3
# - imlemented the missing switch -c / --encoding, with an extra
# feature for POSIX platforms which can guess encoding.

from odf.opendocument import OpenDocumentSpreadsheet
from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties
from odf.text import P
from odf.table import Table, TableColumn, TableRow, TableCell
from optparse import OptionParser
import sys,csv,re, os
import sys,csv,re, os, codecs

if sys.version_info.major==3: unicode=str

if sys.version_info.major==2:
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)

def __iter__(self):
return self

def next(self):
return self.reader.next().encode("utf-8")

class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)

def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]

def __iter__(self):
return self


def csvToOds( pathFileCSV, pathFileODS, tableName='table',
delimiter=',', quoting=csv.QUOTE_MINIMAL,
quotechar = '"', escapechar = None,
skipinitialspace = False, lineterminator = '\r\n'):
textdoc = OpenDocumentSpreadsheet()
# Create a style for the table content. One we can modify
# later in the word processor.
tablecontents = Style(name="Table Contents", family="paragraph")
tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
tablecontents.addElement(TextProperties(fontweight="bold"))
textdoc.styles.addElement(tablecontents)

# Start the table
table = Table( name=tableName )
output = os.popen('/usr/bin/file ' + pathFileCSV).read()
m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
if m:
encoding=m.group(1)
if 'ISO-8859' in encoding:
encoding="latin-1"
else:
encoding="utf-8"

reader = csv.reader(open(pathFileCSV, encoding=encoding),
delimiter=delimiter,
quoting=quoting,
quotechar=quotechar,
escapechar=escapechar,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator)
fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')

for row in reader:
tr = TableRow()
table.addElement(tr)
for val in row:
if fltExp.match(val):
tc = TableCell(valuetype="float", value=val.strip())
else:
tc = TableCell(valuetype="string")
tr.addElement(tc)
p = P(stylename=tablecontents,text=val)
tc.addElement(p)

textdoc.spreadsheet.addElement(table)
textdoc.save( pathFileODS )

delimiter=',', quoting=csv.QUOTE_MINIMAL,
quotechar = '"', escapechar = None,
skipinitialspace = False, lineterminator = '\r\n',
encoding="utf-8"):
textdoc = OpenDocumentSpreadsheet()
# Create a style for the table content. One we can modify
# later in the word processor.
tablecontents = Style(name="Table Contents", family="paragraph")
tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
tablecontents.addElement(TextProperties(fontweight="bold"))
textdoc.styles.addElement(tablecontents)

# Start the table
table = Table( name=tableName )

if sys.version_info.major==3:
reader = csv.reader(open(pathFileCSV, encoding=encoding),
delimiter=delimiter,
quoting=quoting,
quotechar=quotechar,
escapechar=escapechar,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator)
else:
reader = UnicodeReader(open(pathFileCSV),
encoding=encoding,
delimiter=delimiter,
quoting=quoting,
quotechar=quotechar,
escapechar=escapechar,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator)
fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')

for row in reader:
tr = TableRow()
table.addElement(tr)
for val in row:
if fltExp.match(val):
tc = TableCell(valuetype="float", value=val.strip())
else:
tc = TableCell(valuetype="string")
tr.addElement(tc)
p = P(stylename=tablecontents,text=val)
tc.addElement(p)

textdoc.spreadsheet.addElement(table)
textdoc.save( pathFileODS )

if __name__ == "__main__":
usage = "%prog -i file.csv -o file.ods -d"
parser = OptionParser(usage=usage, version="%prog 0.1")
parser.add_option('-i','--input', action='store',
dest='input', help='File input in csv')
parser.add_option('-o','--output', action='store',
dest='output', help='File output in ods')
parser.add_option('-d','--delimiter', action='store',
dest='delimiter', help='specifies a one-character string to use as the field separator. It defaults to ",".')

parser.add_option('-c','--encoding', action='store',
dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')

parser.add_option('-t','--table', action='store',
dest='tableName', help='The table name in the output file')

parser.add_option('-s','--skipinitialspace',
dest='skipinitialspace', help='''specifies how to interpret whitespace which
immediately follows a delimiter. It defaults to False, which
means that whitespace immediately following a delimiter is part
of the following field.''')

parser.add_option('-l','--lineterminator', action='store',
dest='lineterminator', help='''specifies the character sequence which should
terminate rows.''')

parser.add_option('-q','--quoting', action='store',
dest='quoting', help='''It can take on any of the following module constants:
0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
1 = QUOTE_ALL means that quotes are always placed around fields.
2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
3 = QUOTE_NONE means that quotes are never placed around fields.
It defaults is QUOTE_MINIMAL''')

parser.add_option('-e','--escapechar', action='store',
dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')

parser.add_option('-r','--quotechar', action='store',
dest='quotechar', help='''specifies a one-character string to use as the quoting character. It defaults to ".''')

(options, args) = parser.parse_args()

if options.input:
pathFileCSV = options.input
else:
parser.print_help()
exit( 0 )

if options.output:
pathFileODS = options.output
else:
parser.print_help()
exit( 0 )

if options.delimiter:
delimiter = options.delimiter
else:
delimiter = ","

if options.skipinitialspace:
skipinitialspace = True
else:
skipinitialspace=False

if options.lineterminator:
lineterminator = options.lineterminator
else:
lineterminator ="\r\n"

if options.escapechar:
escapechar = options.escapechar
else:
escapechar=None

if options.tableName:
tableName = options.tableName
else:
tableName = "table"

if options.quotechar:
quotechar = options.quotechar
else:
quotechar = "\""

csvToOds( pathFileCSV=pathFileCSV, pathFileODS=pathFileODS,
delimiter=delimiter, skipinitialspace=skipinitialspace,
escapechar=escapechar, lineterminator=lineterminator,
tableName=tableName, quotechar=quotechar)
usage = "%prog -i file.csv -o file.ods -d"
parser = OptionParser(usage=usage, version="%prog 0.1")
parser.add_option('-i','--input', action='store',
dest='input', help='File input in csv')
parser.add_option('-o','--output', action='store',
dest='output', help='File output in ods')
parser.add_option('-d','--delimiter', action='store',
dest='delimiter', help='specifies a one-character string to use as the field separator. It defaults to ",".')

parser.add_option('-c','--encoding', action='store',
dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')

parser.add_option('-t','--table', action='store',
dest='tableName', help='The table name in the output file')

parser.add_option('-s','--skipinitialspace',
dest='skipinitialspace', help='''specifies how to interpret whitespace which
immediately follows a delimiter. It defaults to False, which
means that whitespace immediately following a delimiter is part
of the following field.''')

parser.add_option('-l','--lineterminator', action='store',
dest='lineterminator', help='''specifies the character sequence which should
terminate rows.''')

parser.add_option('-q','--quoting', action='store',
dest='quoting', help='''It can take on any of the following module constants:
0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
1 = QUOTE_ALL means that quotes are always placed around fields.
2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
3 = QUOTE_NONE means that quotes are never placed around fields.
It defaults is QUOTE_MINIMAL''')

parser.add_option('-e','--escapechar', action='store',
dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')

parser.add_option('-r','--quotechar', action='store',
dest='quotechar', help='''specifies a one-character string to use as the quoting character. It defaults to ".''')

(options, args) = parser.parse_args()

if options.input:
pathFileCSV = options.input
else:
parser.print_help()
exit( 0 )

if options.output:
pathFileODS = options.output
else:
parser.print_help()
exit( 0 )

if options.delimiter:
delimiter = options.delimiter
else:
delimiter = ","

if options.skipinitialspace:
skipinitialspace = True
else:
skipinitialspace=False

if options.lineterminator:
lineterminator = options.lineterminator
else:
lineterminator ="\r\n"

if options.escapechar:
escapechar = options.escapechar
else:
escapechar=None

if options.tableName:
tableName = options.tableName
else:
tableName = "table"

if options.quotechar:
quotechar = options.quotechar
else:
quotechar = "\""

encoding = "utf-8" # default setting
###########################################################
## try to guess the encoding; this is implemented only with
## POSIX platforms. Can it be improved?
output = os.popen('/usr/bin/file ' + pathFileCSV).read()
m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
if m:
encoding=m.group(1)
if 'ISO-8859' in encoding:
encoding="latin-1"
else:
encoding="utf-8"
############################################################
# when the -c or --coding switch is used, it takes precedence
if options.encoding:
encoding = options.encoding

csvToOds( pathFileCSV=unicode(pathFileCSV),
pathFileODS=unicode(pathFileODS),
delimiter=delimiter, skipinitialspace=skipinitialspace,
escapechar=escapechar,
lineterminator=unicode(lineterminator),
tableName=tableName, quotechar=quotechar,
encoding=encoding)

# Local Variables: ***
# mode: python ***
# End: ***
9 changes: 8 additions & 1 deletion mailodf/Makefile
Expand Up @@ -9,6 +9,13 @@ txt: mailodf.txt
xmlto txt $<

clean:
rm -f *.1 *.txt odf
rm -f *.1 *~ *.txt odf
odf:
ln -s ../odf

test: clean odf
@echo -n "Please input your local email for the test > "; read to; \
python2 mailodf -f python2@python.org -s "F.Y.I" ../tests/examples/ol.odp "$$to"; \
python3 mailodf -f python3@python.org -s "F.Y.I" ../tests/examples/ol.odp "$$to"
@echo 'You should receive two e-mails, one from python2@python.org'
@echo 'and the second from python3@python.org, with subjects: "F.Y.I"'

0 comments on commit a6dc447

Please sign in to comment.