Skip to content

Commit

Permalink
REFACTOR-modin-project#6852: Remove OrderedDict in favor of builtin dict
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Jan 10, 2024
1 parent 31f8bd0 commit 2263f02
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 61 deletions.
5 changes: 2 additions & 3 deletions examples/docker/modin-hdk/plasticc-hdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# governing permissions and limitations under the License.

import sys
from collections import OrderedDict
from functools import partial

import numpy as np
Expand All @@ -23,7 +22,7 @@

################ helper functions ###############################
def create_dtypes():
dtypes = OrderedDict(
dtypes = dict(
[
("object_id", "int32"),
("mjd", "float32"),
Expand All @@ -50,7 +49,7 @@ def create_dtypes():
"target",
]
meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"]
meta_dtypes = OrderedDict(
meta_dtypes = dict(
[(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]
)
return dtypes, meta_dtypes
Expand Down
5 changes: 2 additions & 3 deletions examples/docker/modin-ray/plasticc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import sys
import time
from collections import OrderedDict
from functools import partial

import numpy as np
Expand All @@ -29,7 +28,7 @@

################ helper functions ###############################
def create_dtypes():
dtypes = OrderedDict(
dtypes = dict(
[
("object_id", "int32"),
("mjd", "float32"),
Expand All @@ -56,7 +55,7 @@ def create_dtypes():
"target",
]
meta_dtypes = ["int32"] + ["float32"] * 4 + ["int32"] + ["float32"] * 5 + ["int32"]
meta_dtypes = OrderedDict(
meta_dtypes = dict(
[(columns_names[i], meta_dtypes[i]) for i in range(len(meta_dtypes))]
)
return dtypes, meta_dtypes
Expand Down
29 changes: 12 additions & 17 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
for pandas storage format.
"""
import datetime
from collections import OrderedDict
from typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union

import numpy as np
Expand Down Expand Up @@ -1686,7 +1685,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
Returns
-------
OrderedDict
dict
A mapping from partition index to list of internal indices which correspond to `indices` in each
partition.
"""
Expand All @@ -1700,33 +1699,31 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
# Converting range-like indexer to slice
indices = slice(indices.start, indices.stop, indices.step)
if is_full_grab_slice(indices, sequence_len=len(self.get_axis(axis))):
return OrderedDict(
return dict(
zip(
range(self._partitions.shape[axis]),
[slice(None)] * self._partitions.shape[axis],
)
)
# Empty selection case
if indices.start == indices.stop and indices.start is not None:
return OrderedDict()
return dict()
if indices.start is None or indices.start == 0:
last_part, last_idx = list(
self._get_dict_of_block_index(axis, [indices.stop]).items()
)[0]
dict_of_slices = OrderedDict(
zip(range(last_part), [slice(None)] * last_part)
)
dict_of_slices = dict(zip(range(last_part), [slice(None)] * last_part))
dict_of_slices.update({last_part: slice(last_idx[0])})
return dict_of_slices
elif indices.stop is None or indices.stop >= len(self.get_axis(axis)):
first_part, first_idx = list(
self._get_dict_of_block_index(axis, [indices.start]).items()
)[0]
dict_of_slices = OrderedDict({first_part: slice(first_idx[0], None)})
dict_of_slices = dict({first_part: slice(first_idx[0], None)})
num_partitions = np.size(self._partitions, axis=axis)
part_list = range(first_part + 1, num_partitions)
dict_of_slices.update(
OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
dict(zip(part_list, [slice(None)] * len(part_list)))
)
return dict_of_slices
else:
Expand All @@ -1737,23 +1734,21 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
self._get_dict_of_block_index(axis, [indices.stop]).items()
)[0]
if first_part == last_part:
return OrderedDict({first_part: slice(first_idx[0], last_idx[0])})
return dict({first_part: slice(first_idx[0], last_idx[0])})
else:
if last_part - first_part == 1:
return OrderedDict(
return dict(
# FIXME: this dictionary creation feels wrong - it might not maintain the order
{
first_part: slice(first_idx[0], None),
last_part: slice(None, last_idx[0]),
}
)
else:
dict_of_slices = OrderedDict(
{first_part: slice(first_idx[0], None)}
)
dict_of_slices = dict({first_part: slice(first_idx[0], None)})
part_list = range(first_part + 1, last_part)
dict_of_slices.update(
OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
dict(zip(part_list, [slice(None)] * len(part_list)))
)
dict_of_slices.update({last_part: slice(None, last_idx[0])})
return dict_of_slices
Expand All @@ -1765,7 +1760,7 @@ def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
# This will help preserve metadata stored in empty dataframes (indexes and dtypes)
# Otherwise, we will get an empty `new_partitions` array, from which it will
# no longer be possible to obtain metadata
return OrderedDict([(0, np.array([], dtype=np.int64))])
return dict([(0, np.array([], dtype=np.int64))])
negative_mask = np.less(indices, 0)
has_negative = np.any(negative_mask)
if has_negative:
Expand Down Expand Up @@ -1827,7 +1822,7 @@ def internal(block_idx: int, global_index):
for i in range(1, len(count_for_each_partition))
if count_for_each_partition[i] > count_for_each_partition[i - 1]
]
return OrderedDict(partition_ids_with_indices)
return dict(partition_ids_with_indices)

@staticmethod
def _join_index_objects(axis, indexes, how, sort):
Expand Down
7 changes: 3 additions & 4 deletions modin/core/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
`BaseIO` is base class for IO classes, that stores IO functions.
"""

from collections import OrderedDict
from typing import Any

import pandas
Expand Down Expand Up @@ -273,8 +272,8 @@ def read_clipboard(cls, sep=r"\s+", **kwargs): # pragma: no cover # noqa: PR01
@doc(
_doc_default_io_method,
summary="Read an Excel file into query compiler",
returns="""BaseQueryCompiler or dict/OrderedDict :
QueryCompiler or OrderedDict/dict with read data.""",
returns="""BaseQueryCompiler or dict :
QueryCompiler or dict with read data.""",
)
def read_excel(cls, **kwargs): # noqa: PR01
ErrorMessage.default_to_pandas("`read_excel`")
Expand All @@ -285,7 +284,7 @@ def read_excel(cls, **kwargs): # noqa: PR01
# pd.ExcelFile in `read_excel` isn't supported
kwargs["io"]._set_pandas_mode()
intermediate = pandas.read_excel(**kwargs)
if isinstance(intermediate, (OrderedDict, dict)):
if isinstance(intermediate, dict):
parsed = type(intermediate)()
for key in intermediate.keys():
parsed[key] = cls.from_pandas(intermediate.get(key))
Expand Down
3 changes: 1 addition & 2 deletions modin/core/storage_formats/cudf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# governing permissions and limitations under the License.

import warnings
from collections import OrderedDict
from io import BytesIO

import numpy as np
Expand Down Expand Up @@ -83,7 +82,7 @@ def single_worker_read(cls, fname, *, reason, **kwargs):
)
)
return pandas_frame
elif isinstance(pandas_frame, (OrderedDict, dict)):
elif isinstance(pandas_frame, dict):
return {
i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)
for i, frame in pandas_frame.items()
Expand Down
3 changes: 1 addition & 2 deletions modin/core/storage_formats/pandas/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import json
import os
import warnings
from collections import OrderedDict
from io import BytesIO, IOBase, TextIOWrapper
from typing import Any, NamedTuple

Expand Down Expand Up @@ -313,7 +312,7 @@ def single_worker_read(cls, fname, *args, reason: str, **kwargs):
)
)
return pandas_frame
elif isinstance(pandas_frame, (OrderedDict, dict)):
elif isinstance(pandas_frame, dict):
return {
i: cls.query_compiler_cls.from_pandas(frame, cls.frame_cls)
for i, frame in pandas_frame.items()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""Module provides ``HdkOnNativeDataframe`` class implementing lazy frame."""

import re
from collections import OrderedDict
from typing import Hashable, Iterable, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -464,7 +463,7 @@ def take_2d_labels_or_positional(
# Sort by the rowid column
base = base.copy(op=SortNode(base, [rowid_col], [False], "last"))
# Remove the rowid column
exprs = OrderedDict()
exprs = dict()
for col in table_cols:
exprs[col] = base.ref(col)
base = base.copy(
Expand Down Expand Up @@ -614,7 +613,7 @@ def generate_by_name(by):
else:
return by

exprs = OrderedDict(
exprs = dict(
((generate_by_name(col), by_frame.ref(col)) for col in groupby_cols)
)
groupby_cols = list(exprs.keys())
Expand Down Expand Up @@ -647,7 +646,7 @@ def generate_by_name(by):

new_dtypes = base._dtypes[groupby_cols].tolist()

agg_exprs = OrderedDict()
agg_exprs = dict()
if isinstance(agg, str):
col_to_ref = {col: base.ref(col) for col in agg_cols}
self._add_agg_exprs(agg, col_to_ref, kwargs, agg_exprs)
Expand Down Expand Up @@ -799,7 +798,7 @@ def agg(self, agg):
"""
assert isinstance(agg, str)

agg_exprs = OrderedDict()
agg_exprs = dict()
for col in self.columns:
agg_exprs[col] = AggregateExpr(agg, self.ref(col))

Expand Down Expand Up @@ -1089,7 +1088,7 @@ def join(
if isinstance(self._op, FrameNode):
other = self.copy()
else:
exprs = OrderedDict((c, self.ref(c)) for c in self._table_cols)
exprs = dict((c, self.ref(c)) for c in self._table_cols)
other = self.__constructor__(
columns=self.columns,
dtypes=self._dtypes_for_exprs(exprs),
Expand Down Expand Up @@ -1129,7 +1128,7 @@ def join(
else:
ignore_index = True
index_cols = None
exprs = OrderedDict()
exprs = dict()
new_dtypes = []

new_columns, left_renamer, right_renamer = join_columns(
Expand Down Expand Up @@ -1235,7 +1234,7 @@ def _union_all(
The new frame.
"""
index_cols = None
col_name_to_dtype = OrderedDict()
col_name_to_dtype = dict()
for col in self.columns:
col_name_to_dtype[col] = self._dtypes[col]

Expand Down Expand Up @@ -1287,7 +1286,7 @@ def _union_all(
)

if sort:
col_name_to_dtype = OrderedDict(
col_name_to_dtype = dict(
(col, col_name_to_dtype[col]) for col in sorted(col_name_to_dtype)
)

Expand All @@ -1308,7 +1307,7 @@ def _union_all(
or any(frame_dtypes.index != dtypes.index)
or any(frame_dtypes.values != dtypes.values)
):
exprs = OrderedDict()
exprs = dict()
uses_rowid = False
for col in table_col_name_to_dtype:
if col in frame_dtypes:
Expand Down Expand Up @@ -1785,7 +1784,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position):
drop_index_cols_after = None

if drop_index_cols_before:
exprs = OrderedDict()
exprs = dict()
index_cols = (
drop_index_cols_after if drop_index_cols_after else None
)
Expand All @@ -1810,7 +1809,7 @@ def sort_rows(self, columns, ascending, ignore_index, na_position):
)

if drop_index_cols_after:
exprs = OrderedDict()
exprs = dict()
for col in base.columns:
exprs[col] = base.ref(col)
base = base.__constructor__(
Expand Down Expand Up @@ -1950,7 +1949,7 @@ def _materialize_rowid(self):
"""
name = self._index_cache.get().name if self.has_materialized_index else None
name = mangle_index_names([name])[0]
exprs = OrderedDict()
exprs = dict()
exprs[name] = self.ref(ROWID_COL_NAME)
for col in self._table_cols:
exprs[col] = self.ref(col)
Expand All @@ -1974,7 +1973,7 @@ def _index_exprs(self):
-------
dict
"""
exprs = OrderedDict()
exprs = dict()
if self._index_cols:
for col in self._index_cols:
exprs[col] = self.ref(col)
Expand Down Expand Up @@ -2290,7 +2289,7 @@ def reset_index(self, drop):
The new frame.
"""
if drop:
exprs = OrderedDict()
exprs = dict()
for c in self.columns:
exprs[c] = self.ref(c)
return self.__constructor__(
Expand All @@ -2306,7 +2305,7 @@ def reset_index(self, drop):
"default index reset with no drop is not supported"
)
# Need to demangle index names.
exprs = OrderedDict()
exprs = dict()
for i, c in enumerate(self._index_cols):
name = ColNameCodec.demangle_index_name(c)
if name is None:
Expand Down Expand Up @@ -2542,7 +2541,7 @@ def set_index_name(self, name):
return self

names = mangle_index_names([name])
exprs = OrderedDict()
exprs = dict()
if self._index_cols is None:
exprs[names[0]] = self.ref(ROWID_COL_NAME)
else:
Expand Down Expand Up @@ -2597,7 +2596,7 @@ def set_index_names(self, names):
)

names = mangle_index_names(names)
exprs = OrderedDict()
exprs = dict()
for old, new in zip(self._index_cols, names):
exprs[new] = self.ref(old)
for col in self.columns:
Expand Down

0 comments on commit 2263f02

Please sign in to comment.