/
compat.py
219 lines (173 loc) · 7.08 KB
/
compat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# pylint: disable= invalid-name, unused-import
"""For compatibility and optional dependencies."""
import importlib.util
import logging
import sys
import types
from typing import Any, Dict, List, Optional, Sequence, cast
import numpy as np
from ._typing import _T
assert sys.version_info[0] == 3, "Python 2 is no longer supported."
def py_str(x: bytes) -> str:
"""convert c string back to python string"""
return x.decode("utf-8") # type: ignore
def lazy_isinstance(instance: Any, module: str, name: str) -> bool:
"""Use string representation to identify a type."""
# Notice, we use .__class__ as opposed to type() in order
# to support object proxies such as weakref.proxy
cls = instance.__class__
is_same_module = cls.__module__ == module
has_same_name = cls.__name__ == name
return is_same_module and has_same_name
# pandas
try:
from pandas import DataFrame, MultiIndex, Series
from pandas import concat as pandas_concat
PANDAS_INSTALLED = True
except ImportError:
MultiIndex = object
DataFrame = object
Series = object
pandas_concat = None
PANDAS_INSTALLED = False
# sklearn
try:
from sklearn.base import BaseEstimator as XGBModelBase
from sklearn.base import ClassifierMixin as XGBClassifierBase
from sklearn.base import RegressorMixin as XGBRegressorBase
from sklearn.preprocessing import LabelEncoder
try:
from sklearn.model_selection import KFold as XGBKFold
from sklearn.model_selection import StratifiedKFold as XGBStratifiedKFold
except ImportError:
from sklearn.cross_validation import KFold as XGBKFold
from sklearn.cross_validation import StratifiedKFold as XGBStratifiedKFold
SKLEARN_INSTALLED = True
except ImportError:
SKLEARN_INSTALLED = False
# used for compatibility without sklearn
XGBModelBase = object
XGBClassifierBase = object
XGBRegressorBase = object
LabelEncoder = object
XGBKFold = None
XGBStratifiedKFold = None
_logger = logging.getLogger(__name__)
def is_cudf_available() -> bool:
"""Check cuDF package available or not"""
if importlib.util.find_spec("cudf") is None:
return False
try:
import cudf
return True
except ImportError:
_logger.exception("Importing cuDF failed, use DMatrix instead of QDM")
return False
class XGBoostLabelEncoder(LabelEncoder):
"""Label encoder with JSON serialization methods."""
def to_json(self) -> Dict:
"""Returns a JSON compatible dictionary"""
meta = {}
for k, v in self.__dict__.items():
if isinstance(v, np.ndarray):
meta[k] = v.tolist()
else:
meta[k] = v
return meta
def from_json(self, doc: Dict) -> None:
# pylint: disable=attribute-defined-outside-init
"""Load the encoder back from a JSON compatible dict."""
meta = {}
for k, v in doc.items():
if k == "classes_":
self.classes_ = np.array(v)
continue
meta[k] = v
self.__dict__.update(meta)
try:
import scipy.sparse as scipy_sparse
from scipy.sparse import csr_matrix as scipy_csr
except ImportError:
scipy_sparse = False
scipy_csr = object
def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statements
"""Concatenate row-wise."""
if isinstance(value[0], np.ndarray):
value_arr = cast(Sequence[np.ndarray], value)
return np.concatenate(value_arr, axis=0)
if scipy_sparse and isinstance(value[0], scipy_sparse.csr_matrix):
return scipy_sparse.vstack(value, format="csr")
if scipy_sparse and isinstance(value[0], scipy_sparse.csc_matrix):
return scipy_sparse.vstack(value, format="csc")
if scipy_sparse and isinstance(value[0], scipy_sparse.spmatrix):
# other sparse format will be converted to CSR.
return scipy_sparse.vstack(value, format="csr")
if PANDAS_INSTALLED and isinstance(value[0], (DataFrame, Series)):
return pandas_concat(value, axis=0)
if lazy_isinstance(value[0], "cudf.core.dataframe", "DataFrame") or lazy_isinstance(
value[0], "cudf.core.series", "Series"
):
from cudf import concat as CUDF_concat # pylint: disable=import-error
return CUDF_concat(value, axis=0)
from .data import _is_cupy_array
if _is_cupy_array(value[0]):
import cupy # pylint: disable=import-error
# pylint: disable=c-extension-no-member,no-member
d = cupy.cuda.runtime.getDevice()
for v in value:
arr = cast(cupy.ndarray, v)
d_v = arr.device.id
assert d_v == d, "Concatenating arrays on different devices."
return cupy.concatenate(value, axis=0)
raise TypeError("Unknown type.")
# Modified from tensorflow with added caching. There's a `LazyLoader` in
# `importlib.utils`, except it's unclear from its document on how to use it. This one
# seems to be easy to understand and works out of box.
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
# file except in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the specific language governing
# permissions and limitations under the License.
class LazyLoader(types.ModuleType):
"""Lazily import a module, mainly to avoid pulling in large dependencies."""
def __init__(
self,
local_name: str,
parent_module_globals: Dict,
name: str,
warning: Optional[str] = None,
) -> None:
self._local_name = local_name
self._parent_module_globals = parent_module_globals
self._warning = warning
self.module: Optional[types.ModuleType] = None
super().__init__(name)
def _load(self) -> types.ModuleType:
"""Load the module and insert it into the parent's globals."""
# Import the target module and insert it into the parent's namespace
module = importlib.import_module(self.__name__)
self._parent_module_globals[self._local_name] = module
# Emit a warning if one was specified
if self._warning:
logging.warning(self._warning)
# Make sure to only warn once.
self._warning = None
# Update this object's dict so that if someone keeps a reference to the
# LazyLoader, lookups are efficient (__getattr__ is only called on lookups
# that fail).
self.__dict__.update(module.__dict__)
return module
def __getattr__(self, item: str) -> Any:
if not self.module:
self.module = self._load()
return getattr(self.module, item)
def __dir__(self) -> List[str]:
if not self.module:
self.module = self._load()
return dir(self.module)