From 02037c1c294d4d03c093c38d4407220c3da8db2f Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Thu, 12 Nov 2020 14:33:39 +0530
Subject: [PATCH 1/6] Add mode to df, series

---
 docs/sphinx/reference/dataframe.rst    |   1 +
 docs/sphinx/reference/series.rst       |   1 +
 eland/dataframe.py                     |  62 +++++++++++++
 eland/field_mappings.py                |   3 +
 eland/groupby.py                       |   3 +
 eland/operations.py                    | 120 ++++++++++++++++++++++---
 eland/query.py                         |  19 +++-
 eland/query_compiler.py                |  18 +++-
 eland/series.py                        |  42 +++++++++
 tests/dataframe/test_groupby_pytest.py |   6 ++
 tests/dataframe/test_metrics_pytest.py |  15 ++++
 tests/series/test_metrics_pytest.py    |  23 +++++
 12 files changed, 299 insertions(+), 14 deletions(-)

diff --git a/docs/sphinx/reference/dataframe.rst b/docs/sphinx/reference/dataframe.rst
index 72869aa8..391c66ac 100644
--- a/docs/sphinx/reference/dataframe.rst
+++ b/docs/sphinx/reference/dataframe.rst
@@ -89,6 +89,7 @@ Computations / Descriptive Stats
    DataFrame.var
    DataFrame.sum
    DataFrame.nunique
+   DataFrame.mode
 
 Reindexing / Selection / Label Manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/sphinx/reference/series.rst b/docs/sphinx/reference/series.rst
index 3e34a2f3..b4355fb4 100644
--- a/docs/sphinx/reference/series.rst
+++ b/docs/sphinx/reference/series.rst
@@ -79,6 +79,7 @@ Computations / Descriptive Stats
    Series.var
    Series.nunique
    Series.value_counts
+   Series.mode
 
 Reindexing / Selection / Label Manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/eland/dataframe.py b/eland/dataframe.py
index 7578a4f7..fe24ab26 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1628,6 +1628,68 @@ def groupby(
             by=by, query_compiler=self._query_compiler.copy(), dropna=dropna
         )
 
+    def mode(
+        self,
+        numeric_only: bool = False,
+        dropna: bool = True,
+        es_size: int = 10,
+    ) -> pd.DataFrame:
+        """
+            Calculate mode of a DataFrame
+
+        Parameters
+        ----------
+        numeric_only: {True, False} Default is False
+            Which datatype to be returned
+            - True: Returns all numeric or timestamp columns
+            - False: Returns all columns
+        dropna: {True, False} Default is True
+            - True: Don’t consider counts of NaN/NaT.
+            - False: Consider counts of NaN/NaT.
+        es_size: default 10
+            number of rows to be returned if mode has multiple values
+
+        See Also
+        --------
+        :pandas_api_docs:`pandas.DataFrame.mode`
+
+        Examples
+        --------
+        >>> ed_ecommerce = ed.DataFrame('localhost', 'ecommerce')
+        >>> ed_df = ed_ecommerce.filter(["total_quantity", "geoip.city_name", "customer_birth_date", "day_of_week", "taxful_total_price"])
+        >>> ed_df.mode(numeric_only=False)
+           total_quantity geoip.city_name customer_birth_date day_of_week  taxful_total_price
+        0               2        New York                 NaT    Thursday               53.98
+
+        >>> ed_df.mode(numeric_only=True)
+           total_quantity  taxful_total_price
+        0               2               53.98
+
+        >>> ed_df = ed_ecommerce.filter(["products.tax_amount","order_date"])
+        >>> ed_df.mode()
+           products.tax_amount          order_date
+        0                  0.0 2016-12-02 20:36:58
+        1                  NaN 2016-12-04 23:44:10
+        2                  NaN 2016-12-08 06:21:36
+        3                  NaN 2016-12-08 09:38:53
+        4                  NaN 2016-12-12 11:38:24
+        5                  NaN 2016-12-12 19:46:34
+        6                  NaN 2016-12-14 18:00:00
+        7                  NaN 2016-12-15 11:38:24
+        8                  NaN 2016-12-22 19:39:22
+        9                  NaN 2016-12-24 06:21:36
+
+        >>> ed_df.mode(es_size = 3)
+           products.tax_amount          order_date
+        0                  0.0 2016-12-02 20:36:58
+        1                  NaN 2016-12-04 23:44:10
+        2                  NaN 2016-12-08 06:21:36
+        """
+        # TODO dropna=False
+        return self._query_compiler.mode(
+            numeric_only=numeric_only, dropna=True, is_dataframe=True, es_size=es_size
+        )
+
     def query(self, expr) -> "DataFrame":
         """
         Query the columns of a DataFrame with a boolean expression.
diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index e2c3b577..9a123248 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -109,6 +109,9 @@ def is_es_agg_compatible(self, es_agg) -> bool:
         # Timestamps also work for 'min', 'max' and 'avg'
         if es_agg in {"min", "max", "avg", "percentiles"} and self.is_timestamp:
             return True
+        # All datatypes work for mode
+        if es_agg == "mode":
+            return True
         return False
 
     @property
diff --git a/eland/groupby.py b/eland/groupby.py
index d57ad939..2183039b 100644
--- a/eland/groupby.py
+++ b/eland/groupby.py
@@ -617,3 +617,6 @@ def count(self) -> "pd.DataFrame":
             numeric_only=False,
             is_dataframe_agg=False,
         )
+
+    def mode(self) -> None:
+        raise NotImplementedError("Currently mode is not supported for groupby")
diff --git a/eland/operations.py b/eland/operations.py
index 2a12986c..21c0e360 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -181,7 +181,7 @@ def _metric_agg_series(
                 dtype = "object"
             return build_pd_series(results, index=results.keys(), dtype=dtype)
 
-    def value_counts(self, query_compiler, es_size):
+    def value_counts(self, query_compiler: "QueryCompiler", es_size: int) -> pd.Series:
         return self._terms_aggs(query_compiler, "terms", es_size)
 
     def hist(self, query_compiler, bins):
@@ -195,12 +195,54 @@ def aggs(self, query_compiler, pd_aggs, numeric_only=None) -> pd.DataFrame:
             results, index=pd_aggs, dtype=(np.float64 if numeric_only else None)
         )
 
+    def mode(
+        self,
+        query_compiler: "QueryCompiler",
+        pd_aggs: List[str],
+        is_dataframe: bool,
+        es_size: int,
+        numeric_only: bool = False,
+        dropna: bool = True,
+    ) -> Union[pd.DataFrame, pd.Series]:
+
+        results = self._metric_aggs(
+            query_compiler,
+            pd_aggs=pd_aggs,
+            numeric_only=numeric_only,
+            dropna=dropna,
+            es_mode_size=es_size,
+        )
+
+        pd_dict: Dict[str, Any] = {}
+        row_diff: Optional[int] = None
+
+        if is_dataframe:
+            # If multiple values of mode is returned for a particular column
+            # find the maximum length and use that to fill dataframe with NaN/NaT
+            rows_len = max([len(value) for value in results.values()])
+            for key, values in results.items():
+                row_diff = rows_len - len(values)
+                # Convert np.ndarray to list
+                values = list(values)
+                if row_diff:
+                    if isinstance(values[0], pd.Timestamp):
+                        values.extend([pd.NaT] * row_diff)
+                    else:
+                        values.extend([np.NaN] * row_diff)
+                pd_dict[key] = values
+
+            return pd.DataFrame(pd_dict)
+        else:
+            return pd.DataFrame(results.values()).iloc[0].rename()
+
     def _metric_aggs(
         self,
         query_compiler: "QueryCompiler",
         pd_aggs: List[str],
         numeric_only: Optional[bool] = None,
         is_dataframe_agg: bool = False,
+        es_mode_size: Optional[int] = None,
+        dropna: bool = True,
     ) -> Dict[str, Any]:
         """
         Used to calculate metric aggregations
@@ -216,6 +258,10 @@ def _metric_aggs(
             return either all numeric values or NaN/NaT
         is_dataframe_agg:
             know if this method is called from single-agg or aggreagation method
+        es_mode_size:
+            number of rows to return when multiple mode values are present.
+        dropna:
+            drop NaN/NaT for a dataframe
 
         Returns
         -------
@@ -252,6 +298,15 @@ def _metric_aggs(
                         es_agg[0],
                         field.aggregatable_es_field_name,
                     )
+                elif es_agg == "mode":
+                    # TODO for dropna=False, Check If field is timestamp or boolean or numeric,
+                    # then use missing parameter for terms aggregation.
+                    body.terms_aggs(
+                        f"{es_agg}_{field.es_field_name}",
+                        "terms",
+                        field.aggregatable_es_field_name,
+                        es_mode_size,
+                    )
                 else:
                     body.metric_aggs(
                         f"{es_agg}_{field.es_field_name}",
@@ -280,7 +335,9 @@ def _metric_aggs(
             is_dataframe_agg=is_dataframe_agg,
         )
 
-    def _terms_aggs(self, query_compiler, func, es_size=None):
+    def _terms_aggs(
+        self, query_compiler: "QueryCompiler", func: str, es_size: int
+    ) -> pd.Series:
         """
         Parameters
         ----------
@@ -499,13 +556,42 @@ def _unpack_metric_aggs(
                             agg_value = np.sqrt(
                                 (count / (count - 1.0)) * agg_value * agg_value
                             )
+                elif es_agg == "mode":
+                    # For terms aggregation buckets are returned
+                    # agg_value will be of type list
+                    agg_value = response["aggregations"][
+                        f"{es_agg}_{field.es_field_name}"
+                    ]["buckets"]
                 else:
                     agg_value = response["aggregations"][
                         f"{es_agg}_{field.es_field_name}"
                     ]["value"]
 
+                if isinstance(agg_value, list):
+                    # include top-terms in the result.
+                    if not agg_value:
+                        # If the all the documents for a field are empty
+                        agg_value = [np.NaN]
+                    else:
+                        max_doc_count = agg_value[0]["doc_count"]
+                        # We need only keys which are equal to max_doc_count
+                        # lesser values are ignored
+                        agg_value = [
+                            agg_value[i]["key"]
+                            for i in range(len(agg_value))
+                            if agg_value[i]["doc_count"] == max_doc_count
+                        ]
+                        # Maintain datatype by default because pandas does the same
+                        # text are returned as-is
+                        if field.is_bool or field.is_numeric:
+                            agg_value = [
+                                field.np_dtype.type(value) for value in agg_value
+                            ]
+
                 # Null usually means there were no results.
-                if agg_value is None or np.isnan(agg_value):
+                if not isinstance(agg_value, list) and (
+                    agg_value is None or np.isnan(agg_value)
+                ):
                     if is_dataframe_agg and not numeric_only:
                         agg_value = np.NaN
                     elif not is_dataframe_agg and numeric_only is False:
@@ -517,13 +603,22 @@ def _unpack_metric_aggs(
 
                 # If this is a non-null timestamp field convert to a pd.Timestamp()
                 elif field.is_timestamp:
-                    agg_value = elasticsearch_date_to_pandas_date(
-                        agg_value, field.es_date_format
-                    )
+                    if isinstance(agg_value, list):
+                        # convert to timestamp results for mode
+                        agg_value = [
+                            elasticsearch_date_to_pandas_date(
+                                value, field.es_date_format
+                            )
+                            for value in agg_value
+                        ]
+                    else:
+                        agg_value = elasticsearch_date_to_pandas_date(
+                            agg_value, field.es_date_format
+                        )
                 # If numeric_only is False | None then maintain column datatype
                 elif not numeric_only:
                     # we're only converting to bool for lossless aggs like min, max, and median.
-                    if pd_agg in {"max", "min", "median", "sum"}:
+                    if pd_agg in {"max", "min", "median", "sum", "mode"}:
                         # 'sum' isn't representable with bool, use int64
                         if pd_agg == "sum" and field.is_bool:
                             agg_value = np.int64(agg_value)
@@ -791,10 +886,15 @@ def _map_pd_aggs_to_es_aggs(pd_aggs):
             elif pd_agg == "median":
                 es_aggs.append(("percentiles", "50.0"))
 
-            # Not implemented
             elif pd_agg == "mode":
-                # We could do this via top term
-                raise NotImplementedError(pd_agg, " not currently implemented")
+                if len(pd_aggs) != 1:
+                    raise NotImplementedError(
+                        "Currently mode is not supported in df.agg[...]. Try df.mode()"
+                    )
+                else:
+                    es_aggs.append("mode")
+
+            # Not implemented
             elif pd_agg == "quantile":
                 # TODO
                 raise NotImplementedError(pd_agg, " not currently implemented")
diff --git a/eland/query.py b/eland/query.py
index 5f7fe2ea..68b89754 100644
--- a/eland/query.py
+++ b/eland/query.py
@@ -101,7 +101,14 @@ def regexp(self, field: str, value: str) -> None:
         else:
             self._query = self._query & Rlike(field, value)
 
-    def terms_aggs(self, name: str, func: str, field: str, es_size: int) -> None:
+    def terms_aggs(
+        self,
+        name: str,
+        func: str,
+        field: str,
+        es_size: Optional[int] = None,
+        missing: Optional[Any] = None,
+    ) -> None:
         """
         Add terms agg e.g
 
@@ -109,12 +116,18 @@ def terms_aggs(self, name: str, func: str, field: str, es_size: int) -> None:
             "name": {
                 "terms": {
                     "field": "Airline",
-                    "size": 10
+                    "size": 10,
+                    "missing": "null"
                 }
             }
         }
         """
-        agg = {func: {"field": field, "size": es_size}}
+        agg = {func: {"field": field}}
+        if es_size:
+            agg[func]["size"] = str(es_size)
+
+        if missing:
+            agg[func]["missing"] = missing
         self._aggs[name] = agg
 
     def metric_aggs(self, name: str, func: str, field: str) -> None:
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index f3a4b077..5b41f04e 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -621,6 +621,22 @@ def nunique(self):
             self, ["nunique"], numeric_only=False
         )
 
+    def mode(
+        self,
+        es_size: int,
+        numeric_only: bool = False,
+        dropna: bool = True,
+        is_dataframe: bool = True,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        return self._operations.mode(
+            self,
+            pd_aggs=["mode"],
+            numeric_only=numeric_only,
+            dropna=dropna,
+            is_dataframe=is_dataframe,
+            es_size=es_size,
+        )
+
     def aggs_groupby(
         self,
         by: List[str],
@@ -638,7 +654,7 @@ def aggs_groupby(
             numeric_only=numeric_only,
         )
 
-    def value_counts(self, es_size):
+    def value_counts(self, es_size: int) -> pd.Series:
         return self._operations.value_counts(self, es_size)
 
     def es_info(self, buf):
diff --git a/eland/series.py b/eland/series.py
index 020fb67a..c023b647 100644
--- a/eland/series.py
+++ b/eland/series.py
@@ -637,6 +637,48 @@ def filter(
         )
         return Series(_query_compiler=new_query_compiler)
 
+    def mode(self, es_size: int = 10) -> pd.Series:
+        """
+            Calculate mode of a series
+
+        Parameters
+        ----------
+        es_size: default 10
+            number of rows to be returned if mode has multiple values
+
+        See Also
+        --------
+        :pandas_api_docs:`pandas.Series.mode`
+
+        Examples
+        --------
+        >>> ed_ecommerce = ed.DataFrame('localhost', 'ecommerce')
+        >>> ed_ecommerce["day_of_week"].mode()
+        0    Thursday
+        dtype: object
+
+        >>> ed_ecommerce["order_date"].mode()
+        0   2016-12-02 20:36:58
+        1   2016-12-04 23:44:10
+        2   2016-12-08 06:21:36
+        3   2016-12-08 09:38:53
+        4   2016-12-12 11:38:24
+        5   2016-12-12 19:46:34
+        6   2016-12-14 18:00:00
+        7   2016-12-15 11:38:24
+        8   2016-12-22 19:39:22
+        9   2016-12-24 06:21:36
+        dtype: datetime64[ns]
+
+        >>> ed_ecommerce["order_date"].mode(es_size=3)
+        0   2016-12-02 20:36:58
+        1   2016-12-04 23:44:10
+        2   2016-12-08 06:21:36
+        dtype: datetime64[ns]
+
+        """
+        return self._query_compiler.mode(is_dataframe=False, es_size=es_size)
+
     def es_match(
         self,
         text: str,
diff --git a/tests/dataframe/test_groupby_pytest.py b/tests/dataframe/test_groupby_pytest.py
index 9dda3c32..1ba1b81a 100644
--- a/tests/dataframe/test_groupby_pytest.py
+++ b/tests/dataframe/test_groupby_pytest.py
@@ -194,3 +194,9 @@ def test_groupby_dataframe_mad(self):
         assert_index_equal(pd_min_mad.columns, ed_min_mad.columns)
         assert_index_equal(pd_min_mad.index, ed_min_mad.index)
         assert_series_equal(pd_min_mad.dtypes, ed_min_mad.dtypes)
+
+    def test_groupby_mode(self):
+        ed_flights = self.ed_flights()
+        match = "Currently mode is not supported for groupby"
+        with pytest.raises(NotImplementedError, match=match):
+            ed_flights.groupby("Cancelled").mode()
diff --git a/tests/dataframe/test_metrics_pytest.py b/tests/dataframe/test_metrics_pytest.py
index 71050fbb..c067baca 100644
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@@ -426,3 +426,18 @@ def test_aggs_count(self):
         ed_count = ed_flights.agg(["count"])
 
         assert_frame_equal(pd_count, ed_count)
+
+    @pytest.mark.parametrize("numeric_only", [True, False])
+    @pytest.mark.parametrize("es_size", [2, 10, 20])
+    def test_aggs_mode(self, es_size, numeric_only):
+        pd_flights = self.pd_flights().filter(
+            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
+        )
+        ed_flights = self.ed_flights().filter(
+            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
+        )
+
+        pd_mode = pd_flights.mode(numeric_only=numeric_only)[:es_size]
+        ed_mode = ed_flights.mode(numeric_only=numeric_only, es_size=es_size)
+
+        assert_frame_equal(pd_mode, ed_mode)
diff --git a/tests/series/test_metrics_pytest.py b/tests/series/test_metrics_pytest.py
index 35244bab..ad6aacb4 100644
--- a/tests/series/test_metrics_pytest.py
+++ b/tests/series/test_metrics_pytest.py
@@ -22,6 +22,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_series_equal
 
 from tests.common import TestData, assert_almost_equal
 
@@ -114,3 +115,25 @@ def test_flights_datetime_median_metric(self):
             <= median
             <= pd.to_datetime("2018-01-01 12:00:00.000")
         )
+
+    @pytest.mark.parametrize(
+        "column", ["day_of_week", "geoip.region_name", "taxful_total_price", "user"]
+    )
+    def test_ecommerce_mode(self, column):
+        ed_series = self.ed_ecommerce()
+        pd_series = self.pd_ecommerce()
+
+        ed_mode = ed_series[column].mode()
+        pd_mode = pd_series[column].mode()
+
+        assert_series_equal(ed_mode, pd_mode)
+
+    @pytest.mark.parametrize("es_size", [2, 10, 20])
+    def test_ecommerce_mode_es_size(self, es_size):
+        ed_series = self.ed_ecommerce()
+        pd_series = self.pd_ecommerce()
+
+        pd_mode = pd_series["order_date"].mode()[:es_size]
+        ed_mode = ed_series["order_date"].mode(es_size)
+
+        assert_series_equal(pd_mode, ed_mode)

From 27f7fd3c0326d585d6937b576aa9f9dd986b0d9a Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Tue, 17 Nov 2020 16:07:20 +0530
Subject: [PATCH 2/6] Changes Requested

---
 eland/field_mappings.py                | 11 ++++++-----
 eland/operations.py                    |  9 +++++----
 tests/dataframe/test_metrics_pytest.py |  7 ++++---
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index 9a123248..40b71a1a 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -102,16 +102,17 @@ def is_es_agg_compatible(self, es_agg) -> bool:
         # Except "median_absolute_deviation" which doesn't support bool
         if es_agg == "median_absolute_deviation" and self.is_bool:
             return False
-        # Cardinality and Count work for all types
+        # Cardinality, Count and mode work for all types
         # Numerics and bools work for all aggs
-        if es_agg in ("cardinality", "value_count") or self.is_numeric or self.is_bool:
+        if (
+            es_agg in ("cardinality", "value_count", "mode")
+            or self.is_numeric
+            or self.is_bool
+        ):
             return True
         # Timestamps also work for 'min', 'max' and 'avg'
         if es_agg in {"min", "max", "avg", "percentiles"} and self.is_timestamp:
             return True
-        # All datatypes work for mode
-        if es_agg == "mode":
-            return True
         return False
 
     @property
diff --git a/eland/operations.py b/eland/operations.py
index 21c0e360..a3be8841 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -571,16 +571,17 @@ def _unpack_metric_aggs(
                     # include top-terms in the result.
                     if not agg_value:
                         # If the all the documents for a field are empty
-                        agg_value = [np.NaN]
+                        agg_value = [field.nan_value]
                     else:
                         max_doc_count = agg_value[0]["doc_count"]
                         # We need only keys which are equal to max_doc_count
                         # lesser values are ignored
                         agg_value = [
-                            agg_value[i]["key"]
-                            for i in range(len(agg_value))
-                            if agg_value[i]["doc_count"] == max_doc_count
+                            item["key"]
+                            for item in agg_value
+                            if item["doc_count"] == max_doc_count
                         ]
+
                         # Maintain datatype by default because pandas does the same
                         # text are returned as-is
                         if field.is_bool or field.is_numeric:
diff --git a/tests/dataframe/test_metrics_pytest.py b/tests/dataframe/test_metrics_pytest.py
index c067baca..ce688014 100644
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@@ -428,13 +428,14 @@ def test_aggs_count(self):
         assert_frame_equal(pd_count, ed_count)
 
     @pytest.mark.parametrize("numeric_only", [True, False])
-    @pytest.mark.parametrize("es_size", [2, 10, 20])
+    @pytest.mark.parametrize("es_size", [2, 20, 100, 5000, 3000])
     def test_aggs_mode(self, es_size, numeric_only):
+        # FlightNum has unique values, so we can test `fill` NaN/NaT for remaining columns
         pd_flights = self.pd_flights().filter(
-            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
+            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry", "FlightNum"]
         )
         ed_flights = self.ed_flights().filter(
-            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
+            ["Cancelled", "dayOfWeek", "timestamp", "DestCountry", "FlightNum"]
         )
 
         pd_mode = pd_flights.mode(numeric_only=numeric_only)[:es_size]

From 3af0b39744e59aaf3b391cd2da3083c12c9afc4c Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Thu, 31 Dec 2020 15:15:33 +0530
Subject: [PATCH 3/6] Add eland in manifest.in

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 345d551a..fe043a09 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
 include LICENSE.txt
 include README.md
 include eland/py.typed
+recursive-include eland

From c0e723b1ebe3194863c6c848a5dcee2fcc74244b Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Thu, 31 Dec 2020 15:26:55 +0530
Subject: [PATCH 4/6] make tuple into set

---
 eland/field_mappings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index 40b71a1a..45724a43 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -105,7 +105,7 @@ def is_es_agg_compatible(self, es_agg) -> bool:
         # Cardinality, Count and mode work for all types
         # Numerics and bools work for all aggs
         if (
-            es_agg in ("cardinality", "value_count", "mode")
+            es_agg in {"cardinality", "value_count", "mode"}
             or self.is_numeric
             or self.is_bool
         ):

From 015e532ef4fe2e007b7d8d8fabe69477a9198530 Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Tue, 5 Jan 2021 23:48:35 +0530
Subject: [PATCH 5/6] Requested changes

---
 MANIFEST.in                            | 1 -
 eland/dataframe.py                     | 2 +-
 eland/operations.py                    | 2 +-
 tests/dataframe/test_metrics_pytest.py | 8 ++++++--
 tests/series/test_metrics_pytest.py    | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index fe043a09..345d551a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
 include LICENSE.txt
 include README.md
 include eland/py.typed
-recursive-include eland
diff --git a/eland/dataframe.py b/eland/dataframe.py
index fe24ab26..9b8dc4c6 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1635,7 +1635,7 @@ def mode(
         es_size: int = 10,
     ) -> pd.DataFrame:
         """
-            Calculate mode of a DataFrame
+        Calculate mode of a DataFrame
 
         Parameters
         ----------
diff --git a/eland/operations.py b/eland/operations.py
index a3be8841..c2615512 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -890,7 +890,7 @@ def _map_pd_aggs_to_es_aggs(pd_aggs):
             elif pd_agg == "mode":
                 if len(pd_aggs) != 1:
                     raise NotImplementedError(
-                        "Currently mode is not supported in df.agg[...]. Try df.mode()"
+                        "Currently mode is not supported in df.agg(...). Try df.mode()"
                     )
                 else:
                     es_aggs.append("mode")
diff --git a/tests/dataframe/test_metrics_pytest.py b/tests/dataframe/test_metrics_pytest.py
index ce688014..7a907dd6 100644
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@@ -428,7 +428,7 @@ def test_aggs_count(self):
         assert_frame_equal(pd_count, ed_count)
 
     @pytest.mark.parametrize("numeric_only", [True, False])
-    @pytest.mark.parametrize("es_size", [2, 20, 100, 5000, 3000])
+    @pytest.mark.parametrize("es_size", [1, 2, 20, 100, 5000, 3000])
     def test_aggs_mode(self, es_size, numeric_only):
         # FlightNum has unique values, so we can test `fill` NaN/NaT for remaining columns
         pd_flights = self.pd_flights().filter(
@@ -441,4 +441,8 @@ def test_aggs_mode(self, es_size, numeric_only):
         pd_mode = pd_flights.mode(numeric_only=numeric_only)[:es_size]
         ed_mode = ed_flights.mode(numeric_only=numeric_only, es_size=es_size)
 
-        assert_frame_equal(pd_mode, ed_mode)
+        # Skipping dtype check because eland is giving Cancelled dtype as bool
+        # but pandas is referring it as object
+        assert_frame_equal(
+            pd_mode, ed_mode, check_dtype=(False if es_size == 1 else True)
+        )
diff --git a/tests/series/test_metrics_pytest.py b/tests/series/test_metrics_pytest.py
index ad6aacb4..c471d97e 100644
--- a/tests/series/test_metrics_pytest.py
+++ b/tests/series/test_metrics_pytest.py
@@ -128,7 +128,7 @@ def test_ecommerce_mode(self, column):
 
         assert_series_equal(ed_mode, pd_mode)
 
-    @pytest.mark.parametrize("es_size", [2, 10, 20])
+    @pytest.mark.parametrize("es_size", [1, 2, 10, 20])
     def test_ecommerce_mode_es_size(self, es_size):
         ed_series = self.ed_ecommerce()
         pd_series = self.pd_ecommerce()

From 0c0a1a08be03707a7f0dee06f78e4b3482810565 Mon Sep 17 00:00:00 2001
From: "P. Sai Vinay" <pvinay1998@gmail.com>
Date: Wed, 6 Jan 2021 10:53:50 +0530
Subject: [PATCH 6/6] Add missing files

---
 docs/sphinx/reference/api/eland.DataFrame.mode.rst | 6 ++++++
 docs/sphinx/reference/api/eland.Series.mode.rst    | 6 ++++++
 2 files changed, 12 insertions(+)
 create mode 100644 docs/sphinx/reference/api/eland.DataFrame.mode.rst
 create mode 100644 docs/sphinx/reference/api/eland.Series.mode.rst

diff --git a/docs/sphinx/reference/api/eland.DataFrame.mode.rst b/docs/sphinx/reference/api/eland.DataFrame.mode.rst
new file mode 100644
index 00000000..56c55aaa
--- /dev/null
+++ b/docs/sphinx/reference/api/eland.DataFrame.mode.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.mode
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.mode
\ No newline at end of file
diff --git a/docs/sphinx/reference/api/eland.Series.mode.rst b/docs/sphinx/reference/api/eland.Series.mode.rst
new file mode 100644
index 00000000..866f5535
--- /dev/null
+++ b/docs/sphinx/reference/api/eland.Series.mode.rst
@@ -0,0 +1,6 @@
+eland.Series.mode
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: Series.mode
\ No newline at end of file