diff --git a/README.md b/README.md index e5473b4e..9d27ce97 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ Data columns (total 27 columns): 26 timestamp 13059 non-null datetime64[ns] dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17) memory usage: 80.0 bytes +Elasticsearch storage usage: 5.043 MB # Filtering of rows using comparisons >>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head() diff --git a/docs/sphinx/examples/demo_notebook.ipynb b/docs/sphinx/examples/demo_notebook.ipynb index 7ffabf85..14731cd5 100644 --- a/docs/sphinx/examples/demo_notebook.ipynb +++ b/docs/sphinx/examples/demo_notebook.ipynb @@ -88,7 +88,7 @@ "eland.dataframe.DataFrame" ] }, - "execution_count": 1, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -3122,7 +3122,8 @@ " 25 dayOfWeek 13059 non-null int64 \n", " 26 timestamp 13059 non-null datetime64[ns]\n", "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", - "memory usage: 64.0 bytes\n" + "memory usage: 64.000 bytes\n", + "Elasticsearch storage usage: 5.043 MB\n" ] } ], @@ -4065,7 +4066,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.8.5" }, "pycharm": { "stem_cell": { diff --git a/eland/dataframe.py b/eland/dataframe.py index f13beebc..bf75ed17 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -831,6 +831,7 @@ def info( 1 geoip.city_name 4094 non-null object dtypes: object(2) memory usage: ... + Elasticsearch storage usage: ... """ if buf is None: # pragma: no cover buf = sys.stdout @@ -940,9 +941,9 @@ def _sizeof_fmt(num, size_qualifier): # returns size in human readable format for x in ["bytes", "KB", "MB", "GB", "TB"]: if num < 1024.0: - return f"{num:3.1f}{size_qualifier} {x}" + return f"{num:3.3f}{size_qualifier} {x}" num /= 1024.0 - return f"{num:3.1f}{size_qualifier} PB" + return f"{num:3.3f}{size_qualifier} PB" if verbose: _verbose_repr() @@ -972,7 +973,13 @@ def _sizeof_fmt(num, size_qualifier): # TODO - this is different from pd.DataFrame as we shouldn't # really hold much in memory. For now just approximate with getsizeof + ignore deep mem_usage = sys.getsizeof(self) - lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") + lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}") + storage_usage = self._query_compiler._client.indices.stats( + index=self._query_compiler._index_pattern, metric=["store"] + )["_all"]["total"]["store"]["size_in_bytes"] + lines.append( + f"Elasticsearch storage usage: {_sizeof_fmt(storage_usage,size_qualifier)}\n" + ) fmt.buffer_put_lines(buf, lines) diff --git a/eland/tests/tests_notebook/test_demo_notebook.ipynb b/eland/tests/tests_notebook/test_demo_notebook.ipynb index 1f662304..0b907c6a 100644 --- a/eland/tests/tests_notebook/test_demo_notebook.ipynb +++ b/eland/tests/tests_notebook/test_demo_notebook.ipynb @@ -2870,7 +2870,8 @@ " 25 dayOfWeek 13059 non-null int64 \n", " 26 timestamp 13059 non-null datetime64[ns]\n", "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", - "memory usage: 64.0 bytes\n" + "memory usage: 64.000 bytes\n", + "Elasticsearch storage usage: 5.043 MB\n" ] } ],