Skip to content

Commit

Permalink
Update format
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Oct 30, 2016
1 parent 571812c commit e4d417e
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 49 deletions.
32 changes: 21 additions & 11 deletions dask/dataframe/core.py
Expand Up @@ -286,21 +286,25 @@ def _repr_data(self):

@property
def _repr_divisions(self):
name = "npartitions={0}".format(self.npartitions)
if self.known_divisions:
divisions = pd.Index(self.divisions, name='divisions')
divisions = pd.Index(self.divisions, name=name)
else:
# avoid to be converted to NaN
divisions = pd.Index(['None'] * (self.npartitions + 1),
name='divisions')
name=name)
return divisions

def __repr__(self):
with pd.option_context("display.max_rows", 5):
data = repr(self._repr_data)

return """{name}
Dask {klass} Structure:
{data}""".format(name=self._repr_header(),
klass=self.__class__.__name__,
data=repr(self._repr_data))
data=data)

@property
def index(self):
Expand Down Expand Up @@ -1579,13 +1583,14 @@ def __repr__(self):
dtype=self.dtype)
else:
footer = "dtype: {dtype}".format(dtype=self.dtype)

return """{name}
Dask {klass} Structure:
{data}
{footer}""".format(name=self._repr_header(),
klass=self.__class__.__name__,
data=self._repr_data.to_string(),
data=self.to_string(),
footer=footer)

@derived_from(pd.Series)
Expand Down Expand Up @@ -1752,8 +1757,9 @@ def to_frame(self, name=None):
meta=self._meta.to_frame(name))

@derived_from(pd.Series)
def to_string(self):
return self._repr_data.to_string()
def to_string(self, max_rows=5):
# option_context doesn't affect
return self._repr_data.to_string(max_rows=max_rows)

@classmethod
def _bind_operator_method(cls, name, op):
Expand Down Expand Up @@ -2247,8 +2253,9 @@ def to_bag(self, index=False):
return to_bag(self, index)

@derived_from(pd.DataFrame)
def to_string(self):
return self._repr_data.to_string()
def to_string(self, max_rows=5):
# option_context doesn't affect
return self._repr_data.to_string(max_rows=max_rows)

def _get_numeric_data(self, how='any', subset=None):
# calculate columns to avoid unnecessary calculation
Expand Down Expand Up @@ -2578,14 +2585,17 @@ def _repr_data(self):
{data}"""

@derived_from(pd.DataFrame)
def to_html(self):
def to_html(self, max_rows=5):
# pd.Series doesn't have html repr
data = self._repr_data.to_html(max_rows=max_rows)
return self._HTML_FMT.format(name=_escape_html_tag(self._repr_header()),
data=self._repr_data.to_html())
data=data)

def _repr_html_(self):
with pd.option_context("display.max_rows", 5):
data = self._repr_data._repr_html_()
return self._HTML_FMT.format(name=_escape_html_tag(self._repr_header()),
data=self._repr_data._repr_html_())
data=data)


# bind operators
Expand Down
177 changes: 139 additions & 38 deletions dask/dataframe/tests/test_format.py
Expand Up @@ -21,21 +21,21 @@ def test_dataframe_format():
ddf = dd.from_pandas(df, 3)
exp = ("dd.DataFrame<from_pa..., npartitions=3, divisions=(0, 3, 6, 7)>\n\n"
"Dask DataFrame Structure:\n"
" A B C\n"
"divisions \n"
"0 int64 object category\n"
"3 ... ... ...\n"
"6 ... ... ...\n"
"7 ... ... ...")
" A B C\n"
"npartitions=3 \n"
"0 int64 object category\n"
"3 ... ... ...\n"
"6 ... ... ...\n"
"7 ... ... ...")
assert repr(ddf) == exp
assert str(ddf) == exp

exp = (" A B C\n"
"divisions \n"
"0 int64 object category\n"
"3 ... ... ...\n"
"6 ... ... ...\n"
"7 ... ... ...")
exp = (" A B C\n"
"npartitions=3 \n"
"0 int64 object category\n"
"3 ... ... ...\n"
"6 ... ... ...\n"
"7 ... ... ...")
assert ddf.to_string() == exp

exp_table = """<table border="1" class="dataframe">
Expand All @@ -47,7 +47,7 @@ def test_dataframe_format():
<th>C</th>
</tr>
<tr>
<th>divisions</th>
<th>npartitions=3</th>
<th></th>
<th></th>
<th></th>
Expand Down Expand Up @@ -103,12 +103,12 @@ def test_dataframe_format_with_index():
ddf = dd.from_pandas(df, 3)
exp = ("dd.DataFrame<from_pa..., npartitions=3, divisions=('A', 'D', 'G', 'H')>\n\n"
"Dask DataFrame Structure:\n"
" A B C\n"
"divisions \n"
"A int64 object category\n"
"D ... ... ...\n"
"G ... ... ...\n"
"H ... ... ...")
" A B C\n"
"npartitions=3 \n"
"A int64 object category\n"
"D ... ... ...\n"
"G ... ... ...\n"
"H ... ... ...")
assert repr(ddf) == exp
assert str(ddf) == exp

Expand All @@ -121,7 +121,7 @@ def test_dataframe_format_with_index():
<th>C</th>
</tr>
<tr>
<th>divisions</th>
<th>npartitions=3</th>
<th></th>
<th></th>
<th></th>
Expand Down Expand Up @@ -179,21 +179,21 @@ def test_dataframe_format_unknown_divisions():

exp = ("dd.DataFrame<from_pa..., npartitions=3>\n\n"
"Dask DataFrame Structure:\n"
" A B C\n"
"divisions \n"
"None int64 object category\n"
"None ... ... ...\n"
"None ... ... ...\n"
"None ... ... ...")
" A B C\n"
"npartitions=3 \n"
"None int64 object category\n"
"None ... ... ...\n"
"None ... ... ...\n"
"None ... ... ...")
assert repr(ddf) == exp
assert str(ddf) == exp

exp = (" A B C\n"
"divisions \n"
"None int64 object category\n"
"None ... ... ...\n"
"None ... ... ...\n"
"None ... ... ...")
exp = (" A B C\n"
"npartitions=3 \n"
"None int64 object category\n"
"None ... ... ...\n"
"None ... ... ...\n"
"None ... ... ...")
assert ddf.to_string() == exp

exp_table = """<table border="1" class="dataframe">
Expand All @@ -205,7 +205,7 @@ def test_dataframe_format_unknown_divisions():
<th>C</th>
</tr>
<tr>
<th>divisions</th>
<th>npartitions=3</th>
<th></th>
<th></th>
<th></th>
Expand Down Expand Up @@ -253,14 +253,101 @@ def test_dataframe_format_unknown_divisions():
assert ddf._repr_html_() == exp


def test_dataframe_format_long():
df = pd.DataFrame({'A': [1, 2, 3, 4, 5, 6, 7, 8] * 10,
'B': list('ABCDEFGH') * 10,
'C': pd.Categorical(list('AAABBBCC') * 10)})
ddf = dd.from_pandas(df, 10)
exp = ("dd.DataFrame<from_pa..., npartitions=10, divisions=(0, 8, 16, ..., 72, 79)>\n\n"
"Dask DataFrame Structure:\n A B C\n"
"npartitions=10 \n0 int64 object category\n"
"8 ... ... ...\n... ... ... ...\n"
"72 ... ... ...\n79 ... ... ...\n\n"
"[11 rows x 3 columns]")
assert repr(ddf) == exp
assert str(ddf) == exp

exp = (" A B C\n"
"npartitions=10 \n"
"0 int64 object category\n"
"8 ... ... ...\n"
"... ... ... ...\n"
"72 ... ... ...\n"
"79 ... ... ...")
assert ddf.to_string() == exp

exp_table = """<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>A</th>
<th>B</th>
<th>C</th>
</tr>
<tr>
<th>npartitions=10</th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>int64</td>
<td>object</td>
<td>category</td>
</tr>
<tr>
<th>8</th>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>72</th>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>79</th>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
</tbody>
</table>"""

exp = """dd.DataFrame&lt;from_pa..., npartitions=10, divisions=(0, 8, 16, ..., 72, 79)&gt;
<div><strong>Dask DataFrame Structure:</strong></div>
{exp_table}""".format(exp_table=exp_table)
assert ddf.to_html() == exp

# table is boxed with div
exp = """dd.DataFrame&lt;from_pa..., npartitions=10, divisions=(0, 8, 16, ..., 72, 79)&gt;
<div><strong>Dask DataFrame Structure:</strong></div>
<div>
{exp_table}
<p>11 rows × 3 columns</p>
</div>""".format(exp_table=exp_table)
assert ddf._repr_html_() == exp


def test_series_format():
s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8],
index=list('ABCDEFGH'))
ds = dd.from_pandas(s, 3)
exp = """dd.Series<from_pa..., npartitions=3, divisions=('A', 'D', 'G', 'H')>
Dask Series Structure:
divisions
npartitions=3
A int64
D ...
G ...
Expand All @@ -269,7 +356,7 @@ def test_series_format():
assert repr(ds) == exp
assert str(ds) == exp

exp = """divisions
exp = """npartitions=3
A int64
D ...
G ...
Expand All @@ -282,7 +369,7 @@ def test_series_format():
exp = """dd.Series<from_pa..., npartitions=3, divisions=('A', 'D', 'G', 'H')>
Dask Series Structure:
divisions
npartitions=3
A int64
D ...
G ...
Expand All @@ -292,14 +379,28 @@ def test_series_format():
assert str(ds) == exp


def test_series_format_long():
s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 10,
index=list('ABCDEFGHIJ') * 10)
ds = dd.from_pandas(s, 10)
exp = ("dd.Series<from_pa..., npartitions=10, divisions=('A', 'B', 'C', ..., 'J', 'J')>\n\n"
"Dask Series Structure:\nnpartitions=10\nA int64\nB ...\n"
" ... \nJ ...\nJ ...\ndtype: int64")
assert repr(ds) == exp
assert str(ds) == exp

exp = "npartitions=10\nA int64\nB ...\n ... \nJ ...\nJ ..."
assert ds.to_string() == exp


def test_index_format():
s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8],
index=list('ABCDEFGH'))
ds = dd.from_pandas(s, 3)
exp = """dd.Index<from_pa..., npartitions=3, divisions=('A', 'D', 'G', 'H')>
Dask Index Structure:
divisions
npartitions=3
A object
D ...
G ...
Expand All @@ -314,7 +415,7 @@ def test_index_format():
exp = """dd.Index<from_pa..., npartitions=3, divisions=(1, 4, 7, 8)>
Dask Index Structure:
divisions
npartitions=3
1 category
4 ...
7 ...
Expand Down

0 comments on commit e4d417e

Please sign in to comment.