In [1]:
from abc import abstractmethod
from tenzing.core.models import tenzing_model
import pandas as pd
import numpy as np

class ContainerMeta(type):
    def __repr__(cls) -> str:
        return f"{cls.__name__}"

    
class Container(metaclass=ContainerMeta):
    def __init__(self):
        self.relations = set()
    
    @abstractmethod
    def mask(self, series):
        pass

    @abstractmethod
    def contains_op(self, series) -> bool:
        pass

    def transform(self, series):
        return self.transform_op(series)

    def transform_op(self, series):
        return series[self.mask(series)]
    
    def __contains__(self, series) -> bool:
        return self.contains_op(series)

    def __add__(self, other):
        if not isinstance(other, Container):
            raise Exception(f"{other} must be of type Container")
        return MultiContainer([self, other])

class MultiContainer(Container):
    def __init__(self, containers):
        assert len(containers) >= 2
        self.containers = containers
        super().__init__()
        
    def mask(self, series):
        mask = self.containers[0].mask(series)
        for container in self.containers[1:]:
            mask &= container.mask(series)
        return mask

    def contains_op(self, series, mask=[]) -> bool:
        return all(series in container for container in self.containers)
    
    def __repr__(self):
        return f"({' + '.join([str(container.__class__) for container in self.containers])})"

class Generic(Container):
    def mask(self, series):
        return np.ones((len(series),), dtype=np.bool)
    
    def contains_op(self, series):
        return True
    
class Infinite(Container):
    def mask(self, series):
        return ~np.isinf(series)

    def contains_op(self, series, mask=[]):
        return self.mask(series).any()

class Missing(Container):
    def mask(self, series):
        return series.notna()

    def contains_op(self, series, mask=[]):
        return series.hasnans

generic = Generic()
infinite = Infinite()
missing = Missing()
generic + infinite

(Generic + Infinite)

In [135]:
class model_relation:
    """Relationship encoder between implementations of :class:`tenzing.core.models.tenzing_model`

    Defines a one to one relationship between two tenzing_model implementations,
    A and B, with respect to an underlying data series. In order to define a relationship we need
    two methods:

        - **is_relationship**, determines whether a series of type B can be alternatively represented as type A.
        - **transform**, provides a mechanism to convert the series from B -> A.

    For example, the series `pd.Series([1.0, 2.0, 3.0])` is encoded as a sequence of
    floats but in reality they are all integers.

    >>> x = pd.Series([1.0, 2.0, 3.0])
    >>> relation = model_relation(tenzing_integer, tenzing_float)
    >>> relation.is_relation(x)
    True

    >>> relation.transform(x)
    pd.Series([1, 2, 3])

    Parameters
    ----------
    model : tenzing_type
        The type this relation will transform a series into.

    friend_model : tenzing_type
        The type this relation will transform a series from.

    relationship : func
        A method to determine if a series of friend_model type can be converted to type model.

    transformer : func
        A method to convert a series from type friend_model to type model.

    """

    def __init__(self, model, friend_model, relationship=None, transformer=None):
        self.model = model
        self.friend_model = friend_model
        self.edge = (self.friend_model, self.model)
        self.relationship = relationship if relationship else self.model.__contains__
        self.transformer = transformer

    def is_relation(self, obj) -> bool:
        return self.relationship(obj)

    def transform(self, obj):
        return self.model.cast(obj, self.transformer)

    def __repr__(self) -> str:
        return f"({self.friend_model} -> {self.model})"


class meta_model(type):
    def __contains__(cls, series) -> bool:
        return cls.contains_op(series)

    def __repr__(cls) -> str:
        return f"{cls.__name__}"

    def __add__(cls, other):
        from tenzing.core.model.compound_type import CompoundType
        from tenzing.core.model.sub_type import subType

        if not issubclass(other, subType):
            raise ValueError("Only Sub types can be added to Compound types.")
        else:
            return CompoundType(cls, [other])

    # TODO: raise exception on instantiation
    #     raise Exception("Cannot instantiate a type!")

    # TODO: automatic static ?
    # https://stackoverflow.com/questions/31953113/purely-static-classes-in-python-use-metaclass-class-decorator-or-something-e


class tenzing_model(metaclass=meta_model):
    """Abstract implementation of a tenzing type.

    Provides a common API for building custom tenzing datatypes. These can optionally
    be augmented with mixins from :mod:`tenzing.core.mixins`

    i.e.

    >>> class tenzing_datetime(tenzing_model):
    >>>     def contains_op(self, series):
    >>>         return pdt.is_datetime64_dtype(series)
    >>>
    >>>     def cast_op(self, series):
    >>>         return pd.to_datetime(series)
    >>>
    """

    _relations = {}

    @classmethod
    def __instancecheck__(mcs, instance) -> bool:
        if instance.__class__ is mcs:
            return True
        else:
            return isinstance(instance.__class__, mcs)

    @classmethod
    def get_relations(cls) -> dict:
        # TODO: move to __new__ or so?
        if cls.__name__ not in cls._relations:
            cls._relations[cls.__name__] = {}

        return cls._relations[cls.__name__]

    @classmethod
    def register_relation(cls, relation) -> None:
        if cls.__name__ not in cls._relations:
            cls._relations[cls.__name__] = {}

        assert (
            relation.friend_model not in cls._relations[cls.__name__]
        ), "Only one relationship permitted per type"
        cls._relations[cls.__name__][relation.friend_model] = relation

    @classmethod
    def cast(cls, series, operation=None):
        operation = operation if operation is not None else cls.cast_op
        return operation(series)

    @classmethod
    @abstractmethod
    def contains_op(cls, series) -> bool:
        pass

    @classmethod
    @abstractmethod
    def cast_op(cls, series):
        pass

In [136]:
from tenzing.core.model_implementations import *

In [137]:
from tenzing.core.typesets import *
import networkx as nx

def detect_series_container(series, containers):
    series_containers = [container for container in containers if series in container]
    container = MultiContainer(series_containers)
    return container
    
class TenzingTypeset(object):
    """
    A collection of tenzing_types with an associated relationship map between them.

    Attributes
    ----------
    types: frozenset
        The collection of tenzing types which are derived either from a base_type or themselves
    """

    def __init__(self, containers, types: list):
        self.types = set(types) | {tenzing_generic}
        self.containers = containers

        self.relation_graph = build_relation_graph(self.types)
        self.column_container_map = {}
        self.column_type_map = {}

    def detect_series_container(self, series):
        self.column_type_map[series.name] = detect_series_container(series, self.containers)
        return self.column_type_map[series.name]

    def get_containerized_series(self, series):
        container = self.detect_series_container(series)
        return container.transform(series)
    
    def plot(self):
        nx.draw_kamada_kawai(self.relation_graph, with_labels=True)
        
ts = TenzingTypeset([generic, missing, infinite],[tenzing_integer, tenzing_float])

s = pd.Series([1.0, 2.0, np.nan, np.inf])
ts.get_containerized_series(s)

0    1.0
1    2.0
dtype: float64

In [138]:
class Type():
    def __init__(self, container, base_type):
        self.container = container
        self.base_type = base_type
    
    def contains_op(self, series):
        if series in self.container:
            return series in self.base_type
        else:
            return False
    
    def transform(self, series):
        container_mask = self.container.mask(series)
        series[container_mask] = self.base_type.cast_op(series[container_mask])
        return series

    def __repr__(self):
        return f"{self.container}[{self.base_type}]"

        
        
            
Type(Infinite() + Missing(), tenzing_float)

(Infinite + Missing)[tenzing_float]

In [139]:
mi = Infinite() + Missing()
s = pd.Series([1.0, np.nan, np.inf])
mi.mask(s)

0     True
1    False
2    False
dtype: bool