From d80b0647df71cada2b5b2fe22c8570ab810bbd6a Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Jul 2015 15:30:04 +0200 Subject: [PATCH] added multiline support for tabular output format --- CHANGES.txt | 2 + src/crate/crash/tabulate.py | 334 ++++++++++++++++++++++++++++---- src/crate/crash/test_command.py | 18 ++ 3 files changed, 311 insertions(+), 43 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 523c7262..ed12cf14 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,8 @@ Changes for crash Unreleased ========== + - added multiline support for tabular output format + 2015/06/16 0.13.0 ================= diff --git a/src/crate/crash/tabulate.py b/src/crate/crash/tabulate.py index 978b4e1b..d9aa361e 100644 --- a/src/crate/crash/tabulate.py +++ b/src/crate/crash/tabulate.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -# -# Copyright (c) 2011-2013 Sergey Astanin +# Copyright (c) 2011-2014 Sergey Astanin # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -35,21 +34,31 @@ from functools import partial _none_type = type(None) _int_type = int + _long_type = long _float_type = float _text_type = unicode _binary_type = str + + def _is_file(f): + return isinstance(f, file) + else: from itertools import zip_longest as izip_longest from functools import reduce, partial _none_type = type(None) _int_type = int + _long_type = int _float_type = float _text_type = str _binary_type = bytes + import io + def _is_file(f): + return isinstance(f, io.IOBase) + __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] -__version__ = "0.7.3" +__version__ = "0.7.5" MIN_PADDING = 0 @@ -132,10 +141,34 @@ def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): return (separator + colsep.join(values_with_attrs)).rstrip() -def _latex_line_begin_tabular(colwidths, colaligns): +def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns): + alignment = { "left": '', + "right": ' style="text-align: right;"', + "center": ' style="text-align: center;"', + "decimal": ' style="text-align: right;"' } + values_with_attrs = ["<{0}{1}>{2}".format(celltag, alignment.get(a, ''), c) + for c, a in zip(cell_values, colaligns)] + return "" + "".join(values_with_attrs).rstrip() + "" + + +def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False): alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" } tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) - return "\\begin{tabular}{" + tabular_columns_fmt + "}\n\hline" + return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}", + "\\toprule" if booktabs else "\hline"]) + +LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#", + r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}", + r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}", + r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"} + + +def _latex_row(cell_values, colwidths, colaligns): + def escape_char(c): + return LATEX_ESCAPE_RULES.get(c, c) + escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values] + rowfmt = DataRow("", "&", "\\\\") + return _build_simple_row(escaped_values, rowfmt) _table_formats = {"simple": @@ -161,6 +194,14 @@ def _latex_line_begin_tabular(colwidths, colaligns): headerrow=DataRow("|", "|", "|"), datarow=DataRow("|", "|", "|"), padding=1, with_header_hide=None), + "fancy_grid": + TableFormat(lineabove=Line("╒", "═", "╤", "╕"), + linebelowheader=Line("╞", "═", "╪", "╡"), + linebetweenrows=Line("├", "─", "┼", "┤"), + linebelow=Line("╘", "═", "╧", "╛"), + headerrow=DataRow("│", "│", "│"), + datarow=DataRow("│", "│", "│"), + padding=1, with_header_hide=None), "pipe": TableFormat(lineabove=_pipe_line_with_colons, linebelowheader=_pipe_line_with_colons, @@ -178,6 +219,14 @@ def _latex_line_begin_tabular(colwidths, colaligns): headerrow=DataRow("|", "|", "|"), datarow=DataRow("|", "|", "|"), padding=1, with_header_hide=None), + "psql": + TableFormat(lineabove=Line("+", "-", "+", "+"), + linebelowheader=Line("|", "-", "+", "|"), + linebetweenrows=None, + linebelow=Line("+", "-", "+", "+"), + headerrow=DataRow("|", "|", "|"), + datarow=DataRow("|", "|", "|"), + padding=1, with_header_hide=None), "rst": TableFormat(lineabove=Line("", "=", " ", ""), linebelowheader=Line("", "=", " ", ""), @@ -195,13 +244,29 @@ def _latex_line_begin_tabular(colwidths, colaligns): headerrow=partial(_mediawiki_row_with_attrs, "!"), datarow=partial(_mediawiki_row_with_attrs, "|"), padding=0, with_header_hide=None), + "html": + TableFormat(lineabove=Line("", "", "", ""), + linebelowheader=None, + linebetweenrows=None, + linebelow=Line("
", "", "", ""), + headerrow=partial(_html_row_with_attrs, "th"), + datarow=partial(_html_row_with_attrs, "td"), + padding=0, with_header_hide=None), "latex": TableFormat(lineabove=_latex_line_begin_tabular, linebelowheader=Line("\\hline", "", "", ""), linebetweenrows=None, linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), - headerrow=DataRow("", "&", "\\\\"), - datarow=DataRow("", "&", "\\\\"), + headerrow=_latex_row, + datarow=_latex_row, + padding=1, with_header_hide=None), + "latex_booktabs": + TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True), + linebelowheader=Line("\\midrule", "", "", ""), + linebetweenrows=None, + linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""), + headerrow=_latex_row, + datarow=_latex_row, padding=1, with_header_hide=None), "tsv": TableFormat(lineabove=None, linebelowheader=None, @@ -214,8 +279,8 @@ def _latex_line_begin_tabular(colwidths, colaligns): tabulate_formats = list(sorted(_table_formats.keys())) -_invisible_codes = re.compile("\x1b\[\d*m") # ANSI color codes -_invisible_codes_bytes = re.compile(b"\x1b\[\d*m") # ANSI color codes +_invisible_codes = re.compile(r"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes +_invisible_codes_bytes = re.compile(b"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes def simple_separated_format(separator): @@ -252,16 +317,17 @@ def _isnumber(string): return _isconvertible(float, string) -def _isint(string): +def _isint(string, inttype=int): """ >>> _isint("123") True >>> _isint("123.45") False """ - return type(string) is int or \ - (isinstance(string, _binary_type) or isinstance(string, _text_type)) and \ - _isconvertible(int, string) + return type(string) is inttype or\ + (isinstance(string, _binary_type) or isinstance(string, _text_type))\ + and\ + _isconvertible(inttype, string) def _type(string, has_invisible=True): @@ -290,6 +356,8 @@ def _type(string, has_invisible=True): return _text_type elif _isint(string): return int + elif _isint(string, _long_type): + return _long_type elif _isnumber(string): return float elif isinstance(string, _binary_type): @@ -332,9 +400,12 @@ def _padleft(width, s, has_invisible=True): True """ - iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width - fmt = "{0:>%ds}" % iwidth - return fmt.format(s) + def impl(val): + iwidth = width + len(val) - len(_strip_invisible(val)) if has_invisible else width + fmt = "{0:>%ds}" % iwidth + return fmt.format(val) + num_lines = s.splitlines() + return len(num_lines) > 1 and u'\n'.join(map(impl, num_lines)) or impl(s) def _padright(width, s, has_invisible=True): @@ -344,9 +415,12 @@ def _padright(width, s, has_invisible=True): True """ - iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width - fmt = "{0:<%ds}" % iwidth - return fmt.format(s) + def impl(val): + iwidth = width + len(val) - len(_strip_invisible(val)) if has_invisible else width + fmt = "{0:<%ds}" % iwidth + return fmt.format(val) + num_lines = s.splitlines() + return len(num_lines) > 1 and u'\n'.join(map(impl, num_lines)) or impl(s) def _padboth(width, s, has_invisible=True): @@ -356,9 +430,12 @@ def _padboth(width, s, has_invisible=True): True """ - iwidth = width + len(s) - len(_strip_invisible(s)) if has_invisible else width - fmt = "{0:^%ds}" % iwidth - return fmt.format(s) + def impl(val): + iwidth = width + len(val) - len(_strip_invisible(val)) if has_invisible else width + fmt = "{0:^%ds}" % iwidth + return fmt.format(val) + num_lines = s.splitlines() + return len(num_lines) > 1 and u'\n'.join(map(impl, num_lines)) or impl(s) def _strip_invisible(s): @@ -368,6 +445,17 @@ def _strip_invisible(s): else: # a bytestring return re.sub(_invisible_codes_bytes, "", s) +def _max_line_width(s): + """ + Visible width of a potentially multinie content. + + >>> _max_line_width('this\\nis\\na\\nmultiline\\ntext') + 9 + + """ + if not s: + return 0 + return max(map(len, s.splitlines())) def _visible_width(s): """Visible width of a printed string. ANSI color codes are removed. @@ -377,9 +465,9 @@ def _visible_width(s): """ if isinstance(s, _text_type) or isinstance(s, _binary_type): - return len(_strip_invisible(s)) + return _max_line_width(_strip_invisible(s)) else: - return len(_text_type(s)) + return _max_line_width(_text_type(s)) def _align_column(strings, alignment, minwidth=0, has_invisible=True): @@ -399,7 +487,10 @@ def _align_column(strings, alignment, minwidth=0, has_invisible=True): strings = [s.strip() for s in strings] padfn = _padboth elif alignment == "decimal": - decimals = [_afterpoint(s) for s in strings] + if has_invisible: + decimals = [_afterpoint(_strip_invisible(s)) for s in strings] + else: + decimals = [_afterpoint(s) for s in strings] maxdecimals = max(decimals) strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)] @@ -413,7 +504,7 @@ def _align_column(strings, alignment, minwidth=0, has_invisible=True): if has_invisible: width_fn = _visible_width else: - width_fn = len + width_fn = _max_line_width maxwidth = max(max(map(width_fn, strings)), minwidth) padded_strings = [padfn(maxwidth, s, has_invisible) for s in strings] @@ -451,8 +542,8 @@ def _column_type(strings, has_invisible=True): return reduce(_more_generic, types, int) -def _format(val, valtype, floatfmt, missingval=""): - """Format a value according to its type. +def _format(val, valtype, floatfmt, missingval="", has_invisible=True): + """Format a value accoding to its type. Unicode is supported: @@ -466,14 +557,23 @@ def _format(val, valtype, floatfmt, missingval=""): if val is None: return missingval - if valtype in [int, _text_type]: - return _text_type(val) + if valtype in [int, _long_type, _text_type]: + return "{0}".format(val) elif valtype is _binary_type: - return _text_type(val, "ascii") + try: + return _text_type(val, "ascii") + except TypeError: + return _text_type(val) elif valtype is float: - return format(float(val), floatfmt) + is_a_colored_number = has_invisible and isinstance(val, (_text_type, _binary_type)) + if is_a_colored_number: + raw_val = _strip_invisible(val) + formatted_val = format(float(raw_val), floatfmt) + return val.replace(raw_val, formatted_val) + else: + return format(float(val), floatfmt) else: - return _text_type(val) + return "{0}".format(val) def _align_header(header, alignment, width): @@ -563,9 +663,18 @@ def _normalize_tabular_data(tabular_data, headers): uniq_keys.add(k) if headers == 'keys': headers = keys - elif headers == "firstrow" and len(rows) > 0: - headers = [firstdict.get(k, k) for k in keys] + elif isinstance(headers, dict): + # a dict of headers for a list of dicts + headers = [headers.get(k, k) for k in keys] headers = list(map(_text_type, headers)) + elif headers == "firstrow": + if len(rows) > 0: + headers = [firstdict.get(k, k) for k in keys] + headers = list(map(_text_type, headers)) + else: + headers = [] + elif headers: + raise ValueError('headers for a list of dicts is not a dict or a keyword') rows = [[row.get(k) for k in keys] for row in rows] elif headers == "keys" and len(rows) > 0: # keys are column indices @@ -589,7 +698,7 @@ def _normalize_tabular_data(tabular_data, headers): return rows, headers -def tabulate(tabular_data, headers=[], tablefmt="simple", +def tabulate(tabular_data, headers=(), tablefmt="simple", floatfmt="g", numalign="decimal", stralign="left", missingval=""): """Format a fixed width table for pretty printing. @@ -661,7 +770,7 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", Various plain-text table formats (`tablefmt`) are supported: 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki', - and 'latex'. Variable `tabulate_formats` contains the list of + 'latex', and 'latex_booktabs'. Variable `tabulate_formats` contains the list of currently supported formats. "plain" format doesn't use any pseudographics to draw tables, @@ -705,6 +814,19 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", | eggs | 451 | +-----------+-----------+ + >>> print(tabulate([["this\\nis\\na multiline\\ntext", "41.9999", "foo\\nbar"], ["NULL", "451.0", ""]], + ... ["text", "numbers", "other"], "grid")) + +-------------+----------+-------+ + | text | numbers | other | + +=============+==========+=======+ + | this | 41.9999 | foo | + | is | | bar | + | a multiline | | | + | text | | | + +-------------+----------+-------+ + | NULL | 451 | | + +-------------+----------+-------+ + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid")) +------+----------+ | spam | 41.9999 | @@ -712,6 +834,18 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", | eggs | 451 | +------+----------+ + "fancy_grid" draws a grid using box-drawing characters: + + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], + ... ["strings", "numbers"], "fancy_grid")) + ╒═══════════╤═══════════╕ + │ strings │ numbers │ + ╞═══════════╪═══════════╡ + │ spam │ 41.9999 │ + ├───────────┼───────────┤ + │ eggs │ 451 │ + ╘═══════════╧═══════════╛ + "pipe" is like tables in PHP Markdown Extra extension or Pandoc pipe_tables: @@ -777,6 +911,16 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", | eggs || align="right"| 451 |} + "html" produces HTML markup: + + >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]], + ... headers="firstrow", tablefmt="html")) + + + + +
strings numbers
spam 41.9999
eggs 451
+ "latex" produces a tabular environment of LaTeX document markup: >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex")) @@ -787,6 +931,16 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", \\hline \\end{tabular} + "latex_booktabs" produces a tabular environment of LaTeX document markup + using the booktabs.sty package: + + >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs")) + \\begin{tabular}{lr} + \\toprule + spam & 41.9999 \\\\ + eggs & 451 \\\\ + \\bottomrule + \end{tabular} """ if tabular_data is None: tabular_data = [] @@ -800,12 +954,12 @@ def tabulate(tabular_data, headers=[], tablefmt="simple", if has_invisible: width_fn = _visible_width else: - width_fn = len + width_fn = _max_line_width # format rows and columns, convert numeric values to strings cols = list(zip(*list_of_lists)) coltypes = list(map(_column_type, cols)) - cols = [[_format(v, ct, floatfmt, missingval) for v in c] + cols = [[_format(v, ct, floatfmt, missingval, has_invisible) for v in c] for c,ct in zip(cols, coltypes)] # align columns @@ -862,8 +1016,8 @@ def _build_line(colwidths, colaligns, linefmt): def _pad_row(cells, padding): if cells: - pad = " "*padding - padded_cells = [pad + cell + pad for cell in cells] + pad = " " * padding + padded_cells = ['\n'.join([pad + sub_cell + pad for sub_cell in cell.splitlines()]) for cell in cells] return padded_cells else: return cells @@ -880,6 +1034,15 @@ def _format_table(fmt, headers, rows, colwidths, colaligns): padded_headers = _pad_row(headers, pad) padded_rows = [_pad_row(row, pad) for row in rows] + def get_sub_row(row, idx): + new_row = [] + col_idx = 0 + for col in row: + subrows = col.splitlines() + new_row.append(idx < len(subrows) and subrows[idx] or " " * padded_widths[col_idx]) + col_idx += 1 + return new_row + if fmt.lineabove and "lineabove" not in hidden: lines.append(_build_line(padded_widths, colaligns, fmt.lineabove)) @@ -891,15 +1054,100 @@ def _format_table(fmt, headers, rows, colwidths, colaligns): if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden: # initial rows with a line below for row in padded_rows[:-1]: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) + max_height = max(map(lambda x: len(x.splitlines()), row)) + for line in range(max_height): + lines.append(_build_row(get_sub_row(row, line), padded_widths, colaligns, fmt.datarow)) lines.append(_build_line(padded_widths, colaligns, fmt.linebetweenrows)) # the last row without a line below lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, fmt.datarow)) else: for row in padded_rows: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) + max_height = max(map(lambda x: len(x.splitlines()), row)) + for line in range(max_height): + lines.append(_build_row(get_sub_row(row, line), padded_widths, colaligns, fmt.datarow)) if fmt.linebelow and "linebelow" not in hidden: lines.append(_build_line(padded_widths, colaligns, fmt.linebelow)) return "\n".join(lines) + + +def _main(): + """\ + Usage: tabulate [options] [FILE ...] + + Pretty-print tabular data. + See also https://bitbucket.org/astanin/python-tabulate + + FILE a filename of the file with tabular data; + if "-" or missing, read data from stdin. + + Options: + + -h, --help show this message + -1, --header use the first row of data as a table header + -o FILE, --output FILE print table to FILE (default: stdout) + -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace) + -F FPFMT, --float FPFMT floating point number format (default: g) + -f FMT, --format FMT set output table format; supported formats: + plain, simple, grid, fancy_grid, pipe, orgtbl, + rst, mediawiki, html, latex, latex_booktabs, tsv + (default: simple) + """ + import getopt + import sys + import textwrap + usage = textwrap.dedent(_main.__doc__) + try: + opts, args = getopt.getopt(sys.argv[1:], + "h1o:s:F:f:", + ["help", "header", "output", "sep=", "float=", "format="]) + except getopt.GetoptError as e: + print(e) + print(usage) + sys.exit(2) + headers = [] + floatfmt = "g" + tablefmt = "simple" + sep = r"\s+" + outfile = "-" + for opt, value in opts: + if opt in ["-1", "--header"]: + headers = "firstrow" + elif opt in ["-o", "--output"]: + outfile = value + elif opt in ["-F", "--float"]: + floatfmt = value + elif opt in ["-f", "--format"]: + if value not in tabulate_formats: + print("%s is not a supported table format" % value) + print(usage) + sys.exit(3) + tablefmt = value + elif opt in ["-s", "--sep"]: + sep = value + elif opt in ["-h", "--help"]: + print(usage) + sys.exit(0) + files = [sys.stdin] if not args else args + with (sys.stdout if outfile == "-" else open(outfile, "w")) as out: + for f in files: + if f == "-": + f = sys.stdin + if _is_file(f): + _pprint_file(f, headers=headers, tablefmt=tablefmt, + sep=sep, floatfmt=floatfmt, file=out) + else: + with open(f) as fobj: + _pprint_file(fobj, headers=headers, tablefmt=tablefmt, + sep=sep, floatfmt=floatfmt, file=out) + + +def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file): + rows = fobject.readlines() + table = [re.split(sep, r.rstrip()) for r in rows] + print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file) + + +if __name__ == "__main__": + _main() diff --git a/src/crate/crash/test_command.py b/src/crate/crash/test_command.py index d642e841..f209913f 100644 --- a/src/crate/crash/test_command.py +++ b/src/crate/crash/test_command.py @@ -239,6 +239,24 @@ def test_tabulate_boolean_int_column(self): cmd.pprint(rows, cols=['x']) self.assertEqual(expected, output.getvalue()) + def test_multiline_row(self): + """ + Create ta column that holds rows with multiline text. + """ + rows = [[u'create table foo (\n id integer,\n name string\n)', 'foo\nbar', 1]] + expected = "\n".join(['+-----------------------+-----+---+', + '| show create table foo | a | b |', + '+-----------------------+-----+---+', + '| create table foo ( | foo | 1 |', + '| id integer, | bar | |', + '| name string | | |', + '| ) | | |', + '+-----------------------+-----+---+\n']) + cmd = CrateCmd() + with patch('sys.stdout', new_callable=StringIO) as output: + cmd.pprint(rows, cols=['show create table foo', 'a', 'b']) + self.assertEqual(expected, output.getvalue()) + def test_error_exit_code(self): """Test returns an error exit code""" stmt = u"select * from invalid sql statement"