* fixed a lot of issues due to inconsistencies of the code initially

written for Python2, mainly confusions between byte strings and unicode strings. The new code tries to use byte strings for file-like streams and unicode strings for in-memory objects. * added JavaDoc strings to document thoroughly functions and methods, mainly in odf/opendocument.py * added a config.dox file to allow one to build a documentation in HTML and LaTeX, for library developpers and users. * added many assert() clauses to ensure the types of parameters in functions and methods * modified the utilities to make them usable with both Python2 and Python3 * implemented a feature announced in csv2ods manfiles but not previously active: -c / --encoding switch to take in account the encoding of the CSV source file. * added rules to build the developer's documentation and install it in usr/share/python-odf/API-doc/html * added a dependency python-odf-doc -> libjs-jquery necessary because of HTML code output by Doxygen * -- Georges Khaznadar <georgesk@debian.org> Tue, 28 Oct 2014 10:41:32 +0100
eea · Oct 28, 2014 · a6dc447 · a6dc447
1 parent 8eb8d7b
commit a6dc447
Show file tree

Hide file tree

Showing 40 changed files with 1,301 additions and 767 deletions.
diff --git a/csv2ods/Makefile b/csv2ods/Makefile
@@ -9,6 +9,14 @@ txt: csv2ods.txt
 	xmlto txt $<
 
 clean:
-	rm -f *.txt odf
+	rm -f *.1 *~ *.txt odf test.csv test2.ods test3.ods
 odf:
 	ln -s ../odf
+
+test: clean odf
+	@echo 1,2,3,4 > test.csv
+	@echo 5,6,7,8 >> test.csv
+	@echo 9,10,11,12 >> test.csv
+	@python2 csv2ods -i test.csv -o test2.ods
+	@python3 csv2ods -i test.csv -o test3.ods
+	@echo created files test.csv, test2.ods and test3.ods
diff --git a/csv2ods/csv2ods b/csv2ods/csv2ods
@@ -16,147 +16,214 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #
 # Contributor(s):
+#
 # Søren Roug
+#
+# Oct 2014: Georges Khaznadar <georgesk@debian.org>
+#   - ported to Python3
+#   - imlemented the missing switch -c / --encoding, with an extra
+#     feature for POSIX platforms which can guess encoding.
 
 from odf.opendocument import OpenDocumentSpreadsheet
 from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties
 from odf.text import P
 from odf.table import Table, TableColumn, TableRow, TableCell
 from optparse import OptionParser
-import sys,csv,re, os
+import sys,csv,re, os, codecs
+
+if sys.version_info.major==3: unicode=str
+
+if sys.version_info.major==2:
+    class UTF8Recoder:
+        """
+        Iterator that reads an encoded stream and reencodes the input to UTF-8
+        """
+        def __init__(self, f, encoding):
+            self.reader = codecs.getreader(encoding)(f)
+
+        def __iter__(self):
+            return self
+
+        def next(self):
+            return self.reader.next().encode("utf-8")
+
+    class UnicodeReader:
+        """
+        A CSV reader which will iterate over lines in the CSV file "f",
+        which is encoded in the given encoding.
+        """
+
+        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+            f = UTF8Recoder(f, encoding)
+            self.reader = csv.reader(f, dialect=dialect, **kwds)
+
+        def next(self):
+            row = self.reader.next()
+            return [unicode(s, "utf-8") for s in row]
+
+        def __iter__(self):
+            return self
+
 
 def csvToOds( pathFileCSV, pathFileODS, tableName='table',
-	      delimiter=',', quoting=csv.QUOTE_MINIMAL,
-	      quotechar = '"', escapechar = None,
-	      skipinitialspace = False, lineterminator = '\r\n'):
-	textdoc = OpenDocumentSpreadsheet()
-	# Create a style for the table content. One we can modify
-	# later in the word processor.
-	tablecontents = Style(name="Table Contents", family="paragraph")
-	tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
-	tablecontents.addElement(TextProperties(fontweight="bold"))
-	textdoc.styles.addElement(tablecontents)
-
-	# Start the table
-	table = Table( name=tableName )
-	output = os.popen('/usr/bin/file ' + pathFileCSV).read()
-	m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
-	if m:
-		encoding=m.group(1)
-		if 'ISO-8859' in encoding:
-			encoding="latin-1"
-	else:
-		encoding="utf-8"
-
-	reader = csv.reader(open(pathFileCSV, encoding=encoding), 
-			    delimiter=delimiter,
-			    quoting=quoting,
-			    quotechar=quotechar,
-			    escapechar=escapechar,
-			    skipinitialspace=skipinitialspace,
-			    lineterminator=lineterminator)
-	fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')
-
-	for row in reader:
-		tr = TableRow()
-		table.addElement(tr)
-		for val in row:
-			if fltExp.match(val):
-				tc = TableCell(valuetype="float", value=val.strip())
-			else:
-				tc = TableCell(valuetype="string")
-			tr.addElement(tc)
-			p = P(stylename=tablecontents,text=val)	
-			tc.addElement(p)
-
-	textdoc.spreadsheet.addElement(table)
-	textdoc.save( pathFileODS )
-
+              delimiter=',', quoting=csv.QUOTE_MINIMAL,
+              quotechar = '"', escapechar = None,
+              skipinitialspace = False, lineterminator = '\r\n',
+              encoding="utf-8"):
+    textdoc = OpenDocumentSpreadsheet()
+    # Create a style for the table content. One we can modify
+    # later in the word processor.
+    tablecontents = Style(name="Table Contents", family="paragraph")
+    tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0"))
+    tablecontents.addElement(TextProperties(fontweight="bold"))
+    textdoc.styles.addElement(tablecontents)
+
+    # Start the table
+    table = Table( name=tableName )
+
+    if sys.version_info.major==3:
+        reader = csv.reader(open(pathFileCSV, encoding=encoding),
+                            delimiter=delimiter,
+                            quoting=quoting,
+                            quotechar=quotechar,
+                            escapechar=escapechar,
+                            skipinitialspace=skipinitialspace,
+                            lineterminator=lineterminator)
+    else:
+        reader = UnicodeReader(open(pathFileCSV),
+                               encoding=encoding,
+                               delimiter=delimiter,
+                               quoting=quoting,
+                               quotechar=quotechar,
+                               escapechar=escapechar,
+                               skipinitialspace=skipinitialspace,
+                               lineterminator=lineterminator)
+    fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$')
+
+    for row in reader:
+        tr = TableRow()
+        table.addElement(tr)
+        for val in row:
+            if fltExp.match(val):
+                tc = TableCell(valuetype="float", value=val.strip())
+            else:
+                tc = TableCell(valuetype="string")
+            tr.addElement(tc)
+            p = P(stylename=tablecontents,text=val)
+            tc.addElement(p)
+
+        textdoc.spreadsheet.addElement(table)
+        textdoc.save( pathFileODS )
+
 if __name__ == "__main__":
-	usage = "%prog -i file.csv -o file.ods -d"
-	parser = OptionParser(usage=usage, version="%prog 0.1")
-	parser.add_option('-i','--input', action='store',
-			dest='input', help='File input in csv')
-	parser.add_option('-o','--output', action='store',
-			dest='output', help='File output in ods')
-	parser.add_option('-d','--delimiter', action='store',
-			dest='delimiter', help='specifies a one-character string to use as the field separator.  It defaults to ",".')
-
-	parser.add_option('-c','--encoding', action='store',
-			dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')
-
-	parser.add_option('-t','--table', action='store',
-			dest='tableName', help='The table name in the output file')
-
-	parser.add_option('-s','--skipinitialspace',
-			dest='skipinitialspace', help='''specifies how to interpret whitespace which
-						immediately follows a delimiter.  It defaults to False, which 
-						means that whitespace immediately following a delimiter is part
-						of the following field.''')
-
-	parser.add_option('-l','--lineterminator', action='store',
-			dest='lineterminator', help='''specifies the character sequence which should
-						terminate rows.''')
-
-	parser.add_option('-q','--quoting', action='store',
-			dest='quoting', help='''It can take on any of the following module constants:
-						0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
-						1 = QUOTE_ALL means that quotes are always placed around fields.
-						2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
-						3 = QUOTE_NONE means that quotes are never placed around fields.
-						It defaults is QUOTE_MINIMAL''')
-
-	parser.add_option('-e','--escapechar', action='store',
-			dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')
-
-	parser.add_option('-r','--quotechar', action='store',
-			dest='quotechar', help='''specifies a one-character string to use as the quoting character.  It defaults to ".''')
-
-	(options, args) = parser.parse_args()
-
-	if options.input:
-		pathFileCSV = options.input
-	else:
-		parser.print_help()
-		exit( 0 )
-
-	if options.output:
-		pathFileODS = options.output
-	else:
-		parser.print_help()
-		exit( 0 )
-
-	if options.delimiter:
-		delimiter = options.delimiter
-	else:
-		delimiter = ","
-
-	if options.skipinitialspace:
-		skipinitialspace = True
-	else:
-		skipinitialspace=False
-
-	if options.lineterminator:
-		lineterminator = options.lineterminator
-	else:
-		lineterminator ="\r\n"
-
-	if options.escapechar:
-		escapechar = options.escapechar
-	else:
-		escapechar=None
-
-	if options.tableName:
-		tableName = options.tableName
-	else:
-		tableName = "table"
-
-	if options.quotechar:
-		quotechar = options.quotechar
-	else:
-		quotechar = "\""
-
-	csvToOds( pathFileCSV=pathFileCSV, pathFileODS=pathFileODS,
-		  delimiter=delimiter, skipinitialspace=skipinitialspace,
-		  escapechar=escapechar, lineterminator=lineterminator,
-		  tableName=tableName, quotechar=quotechar)
+    usage = "%prog -i file.csv -o file.ods -d"
+    parser = OptionParser(usage=usage, version="%prog 0.1")
+    parser.add_option('-i','--input', action='store',
+                      dest='input', help='File input in csv')
+    parser.add_option('-o','--output', action='store',
+                      dest='output', help='File output in ods')
+    parser.add_option('-d','--delimiter', action='store',
+                      dest='delimiter', help='specifies a one-character string to use as the field separator.  It defaults to ",".')
+
+    parser.add_option('-c','--encoding', action='store',
+                      dest='encoding', help='specifies the encoding the file csv. It defaults to utf-8')
+
+    parser.add_option('-t','--table', action='store',
+                      dest='tableName', help='The table name in the output file')
+
+    parser.add_option('-s','--skipinitialspace',
+                      dest='skipinitialspace', help='''specifies how to interpret whitespace which
+                                                immediately follows a delimiter.  It defaults to False, which
+                                                means that whitespace immediately following a delimiter is part
+                                                of the following field.''')
+
+    parser.add_option('-l','--lineterminator', action='store',
+                      dest='lineterminator', help='''specifies the character sequence which should
+                                                terminate rows.''')
+
+    parser.add_option('-q','--quoting', action='store',
+                      dest='quoting', help='''It can take on any of the following module constants:
+                                                0 = QUOTE_MINIMAL means only when required, for example, when a field contains either the quotechar or the delimiter
+                                                1 = QUOTE_ALL means that quotes are always placed around fields.
+                                                2 = QUOTE_NONNUMERIC means that quotes are always placed around fields which do not parse as integers or floating point numbers.
+                                                3 = QUOTE_NONE means that quotes are never placed around fields.
+                                                It defaults is QUOTE_MINIMAL''')
+
+    parser.add_option('-e','--escapechar', action='store',
+                      dest='escapechar', help='''specifies a one-character string used to escape the delimiter when quoting is set to QUOTE_NONE.''')
+
+    parser.add_option('-r','--quotechar', action='store',
+                      dest='quotechar', help='''specifies a one-character string to use as the quoting character.  It defaults to ".''')
+
+    (options, args) = parser.parse_args()
+
+    if options.input:
+        pathFileCSV = options.input
+    else:
+        parser.print_help()
+        exit( 0 )
+
+    if options.output:
+        pathFileODS = options.output
+    else:
+        parser.print_help()
+        exit( 0 )
+
+    if options.delimiter:
+        delimiter = options.delimiter
+    else:
+        delimiter = ","
+
+    if options.skipinitialspace:
+        skipinitialspace = True
+    else:
+        skipinitialspace=False
+
+    if options.lineterminator:
+        lineterminator = options.lineterminator
+    else:
+        lineterminator ="\r\n"
+
+    if options.escapechar:
+        escapechar = options.escapechar
+    else:
+        escapechar=None
+
+    if options.tableName:
+        tableName = options.tableName
+    else:
+        tableName = "table"
+
+    if options.quotechar:
+        quotechar = options.quotechar
+    else:
+        quotechar = "\""
+
+    encoding = "utf-8" # default setting
+    ###########################################################
+    ## try to guess the encoding; this is implemented only with
+    ## POSIX platforms. Can it be improved?
+    output = os.popen('/usr/bin/file ' + pathFileCSV).read()
+    m=re.match(r'^.*: ([-a-zA-Z0-9]+) text$', output)
+    if m:
+        encoding=m.group(1)
+        if 'ISO-8859' in encoding:
+            encoding="latin-1"
+    else:
+        encoding="utf-8"
+    ############################################################
+    # when the -c or --coding switch is used, it takes precedence
+    if options.encoding:
+        encoding = options.encoding
+
+    csvToOds( pathFileCSV=unicode(pathFileCSV),
+              pathFileODS=unicode(pathFileODS),
+              delimiter=delimiter, skipinitialspace=skipinitialspace,
+              escapechar=escapechar,
+              lineterminator=unicode(lineterminator),
+              tableName=tableName, quotechar=quotechar,
+              encoding=encoding)
+
+# Local Variables: ***
+# mode: python     ***
+# End:             ***
diff --git a/mailodf/Makefile b/mailodf/Makefile
@@ -9,6 +9,13 @@ txt: mailodf.txt
 	xmlto txt $<
 
 clean:
-	rm -f *.1 *.txt odf
+	rm -f *.1 *~ *.txt odf
 odf:
 	ln -s ../odf
+
+test: clean odf
+	@echo -n "Please input your local email for the test > "; read to; \
+	  python2 mailodf -f python2@python.org -s "F.Y.I" ../tests/examples/ol.odp "$$to"; \
+	  python3 mailodf -f python3@python.org -s "F.Y.I" ../tests/examples/ol.odp "$$to"
+	@echo 'You should receive two e-mails, one from python2@python.org'
+	@echo 'and the second from python3@python.org, with subjects: "F.Y.I"'