From 6767b4041d83042fb59cbc40d412125664c718ac Mon Sep 17 00:00:00 2001 From: Paul Bustios Date: Mon, 29 Aug 2016 00:06:06 -0300 Subject: [PATCH 1/2] Add support to display Pandas DataFrame index using z.show() and modifies test. --- python/src/main/resources/bootstrap.py | 12 +++++++----- .../python/PythonInterpreterPandasSqlTest.java | 7 +++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/python/src/main/resources/bootstrap.py b/python/src/main/resources/bootstrap.py index 235f7abfed8..6e57ad087cc 100644 --- a/python/src/main/resources/bootstrap.py +++ b/python/src/main/resources/bootstrap.py @@ -142,17 +142,19 @@ def show_dataframe(self, df, **kwargs): """ limit = len(df) > self.max_result header_buf = StringIO("") - header_buf.write(str(df.columns[0])) - for col in df.columns[1:]: + idx_name = df.index.name if df.index.name is not None else "" + header_buf.write(idx_name) + for col in df.columns: header_buf.write("\t") header_buf.write(str(col)) header_buf.write("\n") body_buf = StringIO("") rows = df.head(self.max_result).values if limit else df.values - for row in rows: - body_buf.write(str(row[0])) - for cell in row[1:]: + index = df.index.values + for idx, row in zip(index, rows): + body_buf.write("%html {}".format(str(idx))) + for cell in row: body_buf.write("\t") body_buf.write(str(cell)) body_buf.write("\n") diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java index f9538562c22..bea7300f53e 100644 --- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java @@ -159,9 +159,10 @@ public void showDataFrame() { ret = python.interpret("import pandas as pd", context); ret = python.interpret("import numpy as np", context); - // given a Pandas DataFrame with non-text data + // given a Pandas DataFrame with an index and non-text data + ret = python.interpret("index = pd.Index([10, 11, 12, 13], name='index_name')", context); ret = python.interpret("d1 = {1 : [np.nan, 1, 2, 3], 'two' : [3., 4., 5., 6.7]}", context); - ret = python.interpret("df1 = pd.DataFrame(d1)", context); + ret = python.interpret("df1 = pd.DataFrame(d1, index=index)", context); assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code()); // when @@ -170,6 +171,8 @@ public void showDataFrame() { // then assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code()); assertEquals(ret.message(), Type.TABLE, ret.type()); + assertTrue(ret.message().indexOf("index_name") == 0); + assertTrue(ret.message().indexOf("13") > 0); assertTrue(ret.message().indexOf("nan") > 0); assertTrue(ret.message().indexOf("6.7") > 0); } From 7842f711aefe083b74300017083075576f21e181 Mon Sep 17 00:00:00 2001 From: Paul Bustios Date: Sun, 9 Oct 2016 17:38:42 -0300 Subject: [PATCH 2/2] Add param to make the index to be shown optinally --- python/src/main/resources/bootstrap.py | 17 +++++++++++------ .../python/PythonInterpreterPandasSqlTest.java | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/python/src/main/resources/bootstrap.py b/python/src/main/resources/bootstrap.py index 6e57ad087cc..95a301c5b3e 100644 --- a/python/src/main/resources/bootstrap.py +++ b/python/src/main/resources/bootstrap.py @@ -137,14 +137,16 @@ def show(self, p, **kwargs): elif hasattr(p, '__call__'): p() #error reporting - def show_dataframe(self, df, **kwargs): + def show_dataframe(self, df, show_index=False, **kwargs): """Pretty prints DF using Table Display System """ limit = len(df) > self.max_result header_buf = StringIO("") - idx_name = df.index.name if df.index.name is not None else "" - header_buf.write(idx_name) - for col in df.columns: + if show_index: + idx_name = str(df.index.name) if df.index.name is not None else "" + header_buf.write(idx_name + "\t") + header_buf.write(str(df.columns[0])) + for col in df.columns[1:]: header_buf.write("\t") header_buf.write(str(col)) header_buf.write("\n") @@ -153,8 +155,11 @@ def show_dataframe(self, df, **kwargs): rows = df.head(self.max_result).values if limit else df.values index = df.index.values for idx, row in zip(index, rows): - body_buf.write("%html {}".format(str(idx))) - for cell in row: + if show_index: + body_buf.write("%html {}".format(idx)) + body_buf.write("\t") + body_buf.write(str(row[0])) + for cell in row[1:]: body_buf.write("\t") body_buf.write(str(cell)) body_buf.write("\n") diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java index bea7300f53e..9154394c57c 100644 --- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java @@ -166,7 +166,7 @@ public void showDataFrame() { assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code()); // when - ret = python.interpret("z.show(df1)", context); + ret = python.interpret("z.show(df1, show_index=True)", context); // then assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());