/
categorical.py
72 lines (60 loc) · 2.15 KB
/
categorical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from typing import List, Sequence
import numpy as np
import pandas as pd
from pandas.api import types as pdt
from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
from visions.types import Integer, Object, String, VisionsBaseType
from visions.utils.coercion.test_utils import coercion_map, coercion_map_test
def to_category(series: pd.Series) -> pd.Series:
if series.isin({True, False}).all():
return series.astype(bool)
elif series.isin({True, False, None, np.nan}).all():
return series.astype("Bool")
else:
unsupported_values = series[~series.isin({True, False, None, np.nan})].unique()
raise ValueError(
"Values not supported {unsupported_values}".format(
unsupported_values=unsupported_values
)
)
def _get_relations(cls) -> List[TypeRelation]:
from visions.types import Generic
relations = [
IdentityRelation(cls, Generic),
InferenceRelation(
cls,
String,
relationship=lambda s: coercion_map_test(cls.string_coercions)(
s.str.lower()
),
transformer=lambda s: to_category(
coercion_map(cls.string_coercions)(s.str.lower())
),
),
InferenceRelation(
cls,
Integer,
relationship=lambda s: s.isin({0, 1, np.nan}).all(),
transformer=to_category,
),
InferenceRelation(
cls,
Object,
relationship=lambda s: s.apply(type).isin([type(None), bool]).all(),
transformer=to_category,
),
]
return relations
class Category(VisionsBaseType):
"""**Categorical** implementation of :class:`visions.types.type.VisionsBaseType`.
Examples:
>>> x = pd.Series([True, False, 1], dtype='category')
>>> x in visions.Category
True
"""
@classmethod
def get_relations(cls) -> Sequence[TypeRelation]:
return _get_relations(cls)
@classmethod
def contains_op(cls, series: pd.Series) -> bool:
return pdt.is_categorical_dtype(series) or pdt.is_bool_dtype(series)