Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Elasticsearch storage usage to df.info() #321

Merged
merged 1 commit into from Nov 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -111,6 +111,7 @@ Data columns (total 27 columns):
26 timestamp 13059 non-null datetime64[ns]
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
memory usage: 80.0 bytes
Elasticsearch storage usage: 5.043 MB

# Filtering of rows using comparisons
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
Expand Down
7 changes: 4 additions & 3 deletions docs/sphinx/examples/demo_notebook.ipynb
Expand Up @@ -88,7 +88,7 @@
"eland.dataframe.DataFrame"
]
},
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -3122,7 +3122,8 @@
" 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n"
"memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
]
}
],
Expand Down Expand Up @@ -4065,7 +4066,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.8.5"
},
"pycharm": {
"stem_cell": {
Expand Down
13 changes: 10 additions & 3 deletions eland/dataframe.py
Expand Up @@ -831,6 +831,7 @@ def info(
1 geoip.city_name 4094 non-null object
dtypes: object(2)
memory usage: ...
Elasticsearch storage usage: ...
"""
if buf is None: # pragma: no cover
buf = sys.stdout
Expand Down Expand Up @@ -940,9 +941,9 @@ def _sizeof_fmt(num, size_qualifier):
# returns size in human readable format
for x in ["bytes", "KB", "MB", "GB", "TB"]:
if num < 1024.0:
return f"{num:3.1f}{size_qualifier} {x}"
return f"{num:3.3f}{size_qualifier} {x}"
num /= 1024.0
return f"{num:3.1f}{size_qualifier} PB"
return f"{num:3.3f}{size_qualifier} PB"

if verbose:
_verbose_repr()
Expand Down Expand Up @@ -972,7 +973,13 @@ def _sizeof_fmt(num, size_qualifier):
# TODO - this is different from pd.DataFrame as we shouldn't
# really hold much in memory. For now just approximate with getsizeof + ignore deep
mem_usage = sys.getsizeof(self)
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}")
storage_usage = self._query_compiler._client.indices.stats(
index=self._query_compiler._index_pattern, metric=["store"]
)["_all"]["total"]["store"]["size_in_bytes"]
lines.append(
f"Elasticsearch storage usage: {_sizeof_fmt(storage_usage,size_qualifier)}\n"
)

fmt.buffer_put_lines(buf, lines)

Expand Down
3 changes: 2 additions & 1 deletion eland/tests/tests_notebook/test_demo_notebook.ipynb
Expand Up @@ -2870,7 +2870,8 @@
" 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n"
"memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
]
}
],
Expand Down