REFACTOR-modin-project#6852: Remove OrderedDict in favor of builtin dict

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
anmyachev · Jan 10, 2024 · 2263f02 · 2263f02
1 parent 31f8bd0
commit 2263f02
Show file tree

Hide file tree

Showing 10 changed files with 46 additions and 61 deletions.
diff --git a/examples/docker/modin-hdk/plasticc-hdk.py b/examples/docker/modin-hdk/plasticc-hdk.py
@@ -12,7 +12,6 @@
 # governing permissions and limitations under the License.
 
 import sys
-from collections import OrderedDict
 from functools import partial
 
 import numpy as np
@@ -23,7 +22,7 @@
 
 ################ helper functions ###############################
 def create_dtypes():
-    dtypes = OrderedDict(
+    dtypes = dict(
         [
             ("object_id", "int32"),
             ("mjd", "float32"),
@@ -50,7 +49,7 @@ def create_dtypes():
         "target",
     ]
     meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"]
-    meta_dtypes = OrderedDict(
+    meta_dtypes = dict(
         [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]
     )
     return dtypes, meta_dtypes

diff --git a/examples/docker/modin-ray/plasticc.py b/examples/docker/modin-ray/plasticc.py
@@ -13,7 +13,6 @@
 
 import sys
 import time
-from collections import OrderedDict
 from functools import partial
 
 import numpy as np
@@ -29,7 +28,7 @@
 
 ################ helper functions ###############################
 def create_dtypes():
-    dtypes = OrderedDict(
+    dtypes = dict(
         [
             ("object_id", "int32"),
             ("mjd", "float32"),
@@ -56,7 +55,7 @@ def create_dtypes():
         "target",
     ]
     meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"]
-    meta_dtypes = OrderedDict(
+    meta_dtypes = dict(
         [(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]
     )
     return dtypes, meta_dtypes

diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -18,7 +18,6 @@
 for pandas storage format.
 """
 import datetime
-from collections import OrderedDict
 from typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union
 
 import numpy as np
@@ -1686,7 +1685,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
 
         Returns
         -------
-        OrderedDict
+        dict
             A mapping from partition index to list of internal indices which correspond to `indices` in each
             partition.
         """
@@ -1700,33 +1699,31 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
             # Converting range-like indexer to slice
             indices = slice(indices.start, indices.stop, indices.step)
             if is_full_grab_slice(indices, sequence_len=len(self.get_axis(axis))):
-                return OrderedDict(
+                return dict(
                     zip(
                         range(self._partitions.shape[axis]),
                         [slice(None)] * self._partitions.shape[axis],
                     )
                 )
             # Empty selection case
             if indices.start == indices.stop and indices.start is not None:
-                return OrderedDict()
+                return dict()
             if indices.start is None or indices.start == 0:
                 last_part, last_idx = list(
                     self._get_dict_of_block_index(axis, [indices.stop]).items()
                 )[0]
-                dict_of_slices = OrderedDict(
-                    zip(range(last_part), [slice(None)] * last_part)
-                )
+                dict_of_slices = dict(zip(range(last_part), [slice(None)] * last_part))
                 dict_of_slices.update({last_part: slice(last_idx[0])})
                 return dict_of_slices
             elif indices.stop is None or indices.stop >= len(self.get_axis(axis)):
                 first_part, first_idx = list(
                     self._get_dict_of_block_index(axis, [indices.start]).items()
                 )[0]
-                dict_of_slices = OrderedDict({first_part: slice(first_idx[0], None)})
+                dict_of_slices = dict({first_part: slice(first_idx[0], None)})
                 num_partitions = np.size(self._partitions, axis=axis)
                 part_list = range(first_part + 1, num_partitions)
                 dict_of_slices.update(
-                    OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
+                    dict(zip(part_list, [slice(None)] * len(part_list)))
                 )
                 return dict_of_slices
             else:
@@ -1737,23 +1734,21 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
                     self._get_dict_of_block_index(axis, [indices.stop]).items()
                 )[0]
                 if first_part == last_part:
-                    return OrderedDict({first_part: slice(first_idx[0], last_idx[0])})
+                    return dict({first_part: slice(first_idx[0], last_idx[0])})
                 else:
                     if last_part - first_part == 1:
-                        return OrderedDict(
+                        return dict(
                             # FIXME: this dictionary creation feels wrong - it might not maintain the order
                             {
                                 first_part: slice(first_idx[0], None),
                                 last_part: slice(None, last_idx[0]),
                             }
                         )
                     else:
-                        dict_of_slices = OrderedDict(
-                            {first_part: slice(first_idx[0], None)}
-                        )
+                        dict_of_slices = dict({first_part: slice(first_idx[0], None)})
                         part_list = range(first_part + 1, last_part)
                         dict_of_slices.update(
-                            OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
+                            dict(zip(part_list, [slice(None)] * len(part_list)))
                         )
                         dict_of_slices.update({last_part: slice(None, last_idx[0])})
                         return dict_of_slices
@@ -1765,7 +1760,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
             # This will help preserve metadata stored in empty dataframes (indexes and dtypes)
             # Otherwise, we will get an empty `new_partitions` array, from which it will
             #  no longer be possible to obtain metadata
-            return OrderedDict([(0, np.array([], dtype=np.int64))])
+            return dict([(0, np.array([], dtype=np.int64))])
         negative_mask = np.less(indices, 0)
         has_negative = np.any(negative_mask)
         if has_negative:
@@ -1827,7 +1822,7 @@ def internal(block_idx: int, global_index):
             for i in range(1, len(count_for_each_partition))
             if count_for_each_partition[i] > count_for_each_partition[i - 1]
         ]
-        return OrderedDict(partition_ids_with_indices)
+        return dict(partition_ids_with_indices)
 
     @staticmethod
     def _join_index_objects(axis, indexes, how, sort):

diff --git a/modin/core/io/io.py b/modin/core/io/io.py
@@ -17,7 +17,6 @@
 `BaseIO` is base class for IO classes, that stores IO functions.
 """
 
-from collections import OrderedDict
 from typing import Any
 
 import pandas
@@ -273,8 +272,8 @@ def read_clipboard(cls, sep=r"\s+", **kwargs):  # pragma: no cover # noqa: PR01
     @doc(
         _doc_default_io_method,
         summary="Read an Excel file into query compiler",
-        returns="""BaseQueryCompiler or dict/OrderedDict :
-    QueryCompiler or OrderedDict/dict with read data.""",
+        returns="""BaseQueryCompiler or dict :
+    QueryCompiler or dict with read data.""",
     )
     def read_excel(cls, **kwargs):  # noqa: PR01
         ErrorMessage.default_to_pandas("`read_excel`")
@@ -285,7 +284,7 @@ def read_excel(cls, **kwargs):  # noqa: PR01
             # pd.ExcelFile in `read_excel` isn't supported
             kwargs["io"]._set_pandas_mode()
         intermediate = pandas.read_excel(**kwargs)
-        if isinstance(intermediate, (OrderedDict, dict)):
+        if isinstance(intermediate, dict):
             parsed = type(intermediate)()
             for key in intermediate.keys():
                 parsed[key] = cls.from_pandas(intermediate.get(key))

diff --git a/modin/core/storage_formats/cudf/parser.py b/modin/core/storage_formats/cudf/parser.py
@@ -12,7 +12,6 @@
 # governing permissions and limitations under the License.
 
 import warnings
-from collections import OrderedDict
 from io import BytesIO
 
 import numpy as np
@@ -83,7 +82,7 @@ def single_worker_read(cls, fname, *, reason, **kwargs):
                 )
             )
             return pandas_frame
-        elif isinstance(pandas_frame, (OrderedDict, dict)):
+        elif isinstance(pandas_frame, dict):
             return {
                 i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)
                 for i, frame in pandas_frame.items()

diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
@@ -43,7 +43,6 @@
 import json
 import os
 import warnings
-from collections import OrderedDict
 from io import BytesIO, IOBase, TextIOWrapper
 from typing import Any, NamedTuple
 
@@ -313,7 +312,7 @@ def single_worker_read(cls, fname, *args, reason: str, **kwargs):
                 )
             )
             return pandas_frame
-        elif isinstance(pandas_frame, (OrderedDict, dict)):
+        elif isinstance(pandas_frame, dict):
             return {
                 i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)
                 for i, frame in pandas_frame.items()

diff --git a/...n/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py b/...n/experimental/core/execution/native/implementations/hdk_on_native/dataframe/dataframe.py
@@ -14,7 +14,6 @@
 """Module provides ``HdkOnNativeDataframe`` class implementing lazy frame."""
 
 import re
-from collections import OrderedDict
 from typing import Hashable, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
@@ -464,7 +463,7 @@ def take_2d_labels_or_positional(
             # Sort by the rowid column
             base = base.copy(op=SortNode(base, [rowid_col], [False], "last"))
             # Remove the rowid column
-            exprs = OrderedDict()
+            exprs = dict()
             for col in table_cols:
                 exprs[col] = base.ref(col)
             base = base.copy(
@@ -614,7 +613,7 @@ def generate_by_name(by):
             else:
                 return by
 
-        exprs = OrderedDict(
+        exprs = dict(
             ((generate_by_name(col), by_frame.ref(col)) for col in groupby_cols)
         )
         groupby_cols = list(exprs.keys())
@@ -647,7 +646,7 @@ def generate_by_name(by):
 
         new_dtypes = base._dtypes[groupby_cols].tolist()
 
-        agg_exprs = OrderedDict()
+        agg_exprs = dict()
         if isinstance(agg, str):
             col_to_ref = {col: base.ref(col) for col in agg_cols}
             self._add_agg_exprs(agg, col_to_ref, kwargs, agg_exprs)
@@ -799,7 +798,7 @@ def agg(self, agg):
         """
         assert isinstance(agg, str)
 
-        agg_exprs = OrderedDict()
+        agg_exprs = dict()
         for col in self.columns:
             agg_exprs[col] = AggregateExpr(agg, self.ref(col))
 
@@ -1089,7 +1088,7 @@ def join(
             if isinstance(self._op, FrameNode):
                 other = self.copy()
             else:
-                exprs = OrderedDict((c, self.ref(c)) for c in self._table_cols)
+                exprs = dict((c, self.ref(c)) for c in self._table_cols)
                 other = self.__constructor__(
                     columns=self.columns,
                     dtypes=self._dtypes_for_exprs(exprs),
@@ -1129,7 +1128,7 @@ def join(
         else:
             ignore_index = True
             index_cols = None
-            exprs = OrderedDict()
+            exprs = dict()
             new_dtypes = []
 
             new_columns, left_renamer, right_renamer = join_columns(
@@ -1235,7 +1234,7 @@ def _union_all(
             The new frame.
         """
         index_cols = None
-        col_name_to_dtype = OrderedDict()
+        col_name_to_dtype = dict()
         for col in self.columns:
             col_name_to_dtype[col] = self._dtypes[col]
 
@@ -1287,7 +1286,7 @@ def _union_all(
                         )
 
             if sort:
-                col_name_to_dtype = OrderedDict(
+                col_name_to_dtype = dict(
                     (col, col_name_to_dtype[col]) for col in sorted(col_name_to_dtype)
                 )
 
@@ -1308,7 +1307,7 @@ def _union_all(
                     or any(frame_dtypes.index != dtypes.index)
                     or any(frame_dtypes.values != dtypes.values)
                 ):
-                    exprs = OrderedDict()
+                    exprs = dict()
                     uses_rowid = False
                     for col in table_col_name_to_dtype:
                         if col in frame_dtypes:
@@ -1785,7 +1784,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position):
                     drop_index_cols_after = None
 
                 if drop_index_cols_before:
-                    exprs = OrderedDict()
+                    exprs = dict()
                     index_cols = (
                         drop_index_cols_after if drop_index_cols_after else None
                     )
@@ -1810,7 +1809,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position):
                 )
 
                 if drop_index_cols_after:
-                    exprs = OrderedDict()
+                    exprs = dict()
                     for col in base.columns:
                         exprs[col] = base.ref(col)
                     base = base.__constructor__(
@@ -1950,7 +1949,7 @@ def _materialize_rowid(self):
         """
         name = self._index_cache.get().name if self.has_materialized_index else None
         name = mangle_index_names([name])[0]
-        exprs = OrderedDict()
+        exprs = dict()
         exprs[name] = self.ref(ROWID_COL_NAME)
         for col in self._table_cols:
             exprs[col] = self.ref(col)
@@ -1974,7 +1973,7 @@ def _index_exprs(self):
         -------
         dict
         """
-        exprs = OrderedDict()
+        exprs = dict()
         if self._index_cols:
             for col in self._index_cols:
                 exprs[col] = self.ref(col)
@@ -2290,7 +2289,7 @@ def reset_index(self, drop):
             The new frame.
         """
         if drop:
-            exprs = OrderedDict()
+            exprs = dict()
             for c in self.columns:
                 exprs[c] = self.ref(c)
             return self.__constructor__(
@@ -2306,7 +2305,7 @@ def reset_index(self, drop):
                     "default index reset with no drop is not supported"
                 )
             # Need to demangle index names.
-            exprs = OrderedDict()
+            exprs = dict()
             for i, c in enumerate(self._index_cols):
                 name = ColNameCodec.demangle_index_name(c)
                 if name is None:
@@ -2542,7 +2541,7 @@ def set_index_name(self, name):
             return self
 
         names = mangle_index_names([name])
-        exprs = OrderedDict()
+        exprs = dict()
         if self._index_cols is None:
             exprs[names[0]] = self.ref(ROWID_COL_NAME)
         else:
@@ -2597,7 +2596,7 @@ def set_index_names(self, names):
             )
 
         names = mangle_index_names(names)
-        exprs = OrderedDict()
+        exprs = dict()
         for old, new in zip(self._index_cols, names):
             exprs[new] = self.ref(old)
         for col in self.columns: