A schema is clearly a named, ordered, collection of fields

So.... what is a field?

In Pemi, fields could more specifically be called "field converters".
  - we use them to convert a data element from one format to another (e.g., csv->Pandas, Pandas->Postgres, Spark->CSV, etc)
  - and we use them to build dummy data in the tests
  - and we use them to validate that the data being read is compatible with what is expected
  - But I'm not sure coercion should be wrapped up with validation.
  





In [39]:
import datetime

class CoercionError(ValueError): pass

try:
    datetime.datetime.strptime('2017-31-31', '%Y-%m-%d')
except ValueError as err:
    print(err.__class__.__name__)
    raise CoercionError('{}: {}'.format(err.__class__.__name__, err))


ValueError


CoercionError: ValueError: time data '2017-31-31' does not match format '%Y-%m-%d'

In [48]:
class CoercionError(ValueError): pass
    
    
def convert_exception(fun):
    def wrapper(self, value):
        try:
            coerced = fun(self, value)
        except Exception as err:
            raise CoercionError('Unable to coerce value "{}" to {}: {}: {}'.format(value, self.__class__.__name__, err.__class__.__name__, err))
        return coerced
    return wrapper


class Field():
    def __init__(self, name=None, validators=[], **metadata):
        self.name = name
        self.metadata = metadata
        
        default_metadata = {'null': None}
        self.metadata = {**default_metadata, **metadata}
        self.null = self.metadata['null']
        
    @convert_exception
    def coerce(self, value):
        raise NotImplementedError

    def __str__(self):
        return self.__dict__.__str__()
        
        
class StringField(Field):
    def __init__(self, name=None, validators=[], **metadata):
        super().__init__(name=name, validators=validators, null='', **metadata)
    
    @convert_exception
    def coerce(self, value):
        if not value:
            return self.null
        else:
            return str(value)


class DecimalField(Field):
    def __init__(self, name=None, validators=[], **metadata):
        super().__init__(name=name, validators=validators, **metadata)
        self.precision = self.metadata['precision']
        self.scale = self.metadata['scale']
    
    @convert_exception
    def coerce(self, value):
        if not value:
            return self.null
        else:
            dec = decimal.Decimal(str(value))
            return dec
        
        
        
# my_schema = Schema(**{
#     'id': IntegerField(allow_null=False, is_awesome=True),
#     'name': StringField(),
#     'price': DecimalField(precision=6, scale=2)
# })

# Schema(
#     IntegerField('id', allow_null=False, is_awesome=True),
#     StringField('name'),
#     DecimalField('price', precision=6, scale=2)
# )

# Schema(
#     id=IntegerField(allow_null=False, is_awesome=True),
#     name=StringField(),
#     price=DecimalField(precision=6, scale=2)
# )

#s = StringField(name = 'myname')
#s.in_coerce(None)

#s2 = StringField(name = 'myname', allow_null=False, validators=[NullValidator])
#s2 = StringField(name = 'myname', allow_null=False)
#s2.in_coerce(None)


import decimal
# import datetime

d1 = DecimalField('d1', precision=8, scale=2, enforce_decimal=True, validators=[DecimalValidator])
print(d1.coerce('8..25'))

# try:
#     d1.in_coerce('8.253')
# except DecimalValidationError as err:
#     print(err)

# print(d1.in_coerce(None))
# print(d1.in_coerce(''))
# print(d1.in_coerce(float('NaN')).__class__)




CoercionError: Unable to coerce value "8..25" to DecimalField: InvalidOperation: [<class 'decimal.ConversionSyntax'>]

In [23]:
# SCRAP THIS.... I don't want to mix field coercion with validation
class CoercionError(ValueError): pass
class RequiredValueError(ValueError): pass
class DecimalValidationError(ValueError):pass

# For a validator, I would need to specify a validator name, a value, and a function to run if the value matches some criteria
class FieldValidator():
    meta_name = 'metadata_key'
    meta_value = 'metadata_value'
    
    def __init__(self, **metadata):
        self.metadata = metadata
        
    def validate(self, value):
        raise NotImplementedError
        return True
        
    def validator(self):
        if self.metadata[self.__class__.meta_name] == self.__class__.meta_value:
            return validate
        else:
            return None

class NullValidator(FieldValidator):
    meta_name = 'allow_null'
    meta_value = False
    
    def __init__(self, **metadata):
        super().__init__(**metadata)
        self.null = self.metadata.get('null', None)

    def validate(self, value):
        if value == self.null:
            raise RequiredValueError('Expecting non-null, but "{}" found instead'.format(value))
        return True
        
    
class DecimalValidator(FieldValidator):
    meta_name = 'enforce_decimal'
    meta_value = True
    
    def __init__(self, **metadata):
        super().__init__(**metadata)
        self.precision = self.metadata['precision']
        self.scale = self.metadata['scale']

    def validate(self, value):
        if not value or value != value:
            return value

        detected_precision = len(value.as_tuple().digits)
        detected_scale = -value.as_tuple().exponent

        print('detected_precision: {}'.format(detected_precision))
        print('detected_scale: {}'.format(detected_scale))
        
        if detected_precision > self.precision:
            raise DecimalValidationError('Decimal conversion error for "{}".  Expected precision: {}, Actual precision: {}'.format(
                value, self.precision, detected_precision
            ))
        if detected_scale > self.scale:
            raise DecimalValidationError('Decimal conversion error for "{}".  Expected scale: {}, Actual scale: {}'.format(
                value, self.scale, detected_scale
            ))
        return True
    
    
def validate(in_coerce):
    def wrapper(self, value):
        coerced = in_coerce(self, value)
        [v.validate(coerced) for v in self.validators]
        return coerced
    return wrapper
    
class Field():
    def __init__(self, name=None, validators=[], **metadata):
        self.name = name
        self.metadata = metadata
        
        default_metadata = {'null': None, 'allow_null': True}
        self.metadata = {**default_metadata, **metadata}
  
        validators = validators + [NullValidator, DecimalValidator]
        self.validators = [v(**metadata) for v in validators if metadata.get(v.meta_name) == v.meta_value]
        self.null = self.metadata['null']
        

#     def config(self):
#         raise NotImplementedError
        
        
    @validate
    def in_coerce(self, value):
        raise NotImplementedError

    def __str__(self):
        return self.__dict__.__str__()
        
        
class StringField(Field):
    def __init__(self, name=None, validators=[], **metadata):
        super().__init__(name=name, validators=validators, null='', **metadata)
#        self.null = ''
#        self.metadata['null'] = self.null
    
    @validate
    def in_coerce(self, value):
        if not value:
            return self.null
        else:
            return str(value)


class DecimalField(Field):
    def __init__(self, name=None, validators=[], **metadata):
        super().__init__(name=name, validators=validators, **metadata)
        self.precision = self.metadata['precision']
        self.scale = self.metadata['scale']
    
    @validate
    def in_coerce(self, value):
        print('converting {} to decimal'.format(value))
        if not value:
            return self.null
        else:
            dec = decimal.Decimal(str(value))
            return dec
        
        
        
# my_schema = Schema(**{
#     'id': IntegerField(allow_null=False, is_awesome=True),
#     'name': StringField(),
#     'price': DecimalField(precision=6, scale=2)
# })

# Schema(
#     IntegerField('id', allow_null=False, is_awesome=True),
#     StringField('name'),
#     DecimalField('price', precision=6, scale=2)
# )

# Schema(
#     id=IntegerField(allow_null=False, is_awesome=True),
#     name=StringField(),
#     price=DecimalField(precision=6, scale=2)
# )

#s = StringField(name = 'myname')
#s.in_coerce(None)

#s2 = StringField(name = 'myname', allow_null=False, validators=[NullValidator])
s2 = StringField(name = 'myname', allow_null=False)
s2.in_coerce(None)


# import decimal
# import datetime

# d1 = DecimalField('d1', precision=8, scale=2, enforce_decimal=True, validators=[DecimalValidator])
# print(d1.in_coerce('8.25'))

# try:
#     d1.in_coerce('8.253')
# except DecimalValidationError as err:
#     print(err)

# print(d1.in_coerce(None))
# print(d1.in_coerce(''))
# print(d1.in_coerce(float('NaN')).__class__)




RequiredValueError: Expecting non-null, but "" found instead

False

In [22]:
def init(schema=None, *args, **kwargs):
    print('schema: {}'.format(schema))
    print('args: {}'.format(args))
    print('kwargs: {}'.format(kwargs))
    
init('happy', 'pants')

schema: happy
args: ('pants',)
kwargs: {}


In [9]:
nan

NameError: name 'nan' is not defined

In [10]:
x = float('NaN')
y = float('NaN')
x != x
                      

True

In [20]:
class Animal():
    pass

class Mammal(Animal):
    pass

class Monkey(Mammal):
    pass

class Arachnid(Animal):
    pass

class BlackWidow(Arachnid):
    pass


m = Monkey()
s = BlackWidow()

isinstance(m, Mammal)


True

In [28]:
class Animal():
    def woof(self):
        print('Woof')
    def bark(self):
        return self.woof
    
a = Animal()
a.bark()

<bound method Animal.woof of <__main__.Animal object at 0x7ff8bc2cb390>>

In [4]:
class validator():
    def __init__(self, func):
        self.func = func
        
    def __call__(self, *args):
        print('Called {func} with args: {args}'.format(func=self.func.__name__,args=args))
        return self.func(*args)
    
class Moofield():
    @validator
    def coerce(self):
        print('I am coercing stuff')

Moofield().coerce()
        

Called coerce with args: {}


TypeError: coerce() missing 1 required positional argument: 'self'

In [1]:
class Moof(): 
    @classmethod
    def validate(cls, pie):
        def wrapper(self):
            pie(self)
            print('I am validerting stuffs')
        return wrapper

class Moof():
    def __init__(self):
        pass
    
#    @Moof.validate
    def coerce(self):
        print('I am corcing stuffs')


print(Moof.__dict__)
        
class Blerf(Moof): pass

class Blerf(Moof):
    @Moof.validate
    def coerce(self):
        print('I am coercing blerfy stuffs')
        
Moof().coerce()
print('--')
#Moof.__dict__
#Blerf().coerce()

{'__module__': '__main__', '__init__': <function Moof.__init__ at 0x7fcbed701b70>, 'coerce': <function Moof.coerce at 0x7fcbed701bf8>, '__dict__': <attribute '__dict__' of 'Moof' objects>, '__weakref__': <attribute '__weakref__' of 'Moof' objects>, '__doc__': None}


AttributeError: type object 'Moof' has no attribute 'validate'

In [7]:
def validate(fun):
    def wrapper(self):
        fun(self)
        print('I am validerting {} stuffs'.format(self.name))
    return wrapper


class Moof():
    def __init__(self):
        self.name = 'moofy'
    
    @validate
    def coerce(self):
        print('I am coercing moofy stuffs')

class Blerf(Moof):
    def __init__(self):
        self.name = 'blerfy'
    
    @validate
    def coerce(self):
        print('I am coercing blerfy stuffs')
        
Moof().coerce()
print('--')
Blerf().coerce()

I am coercing moofy stuffs
I am validerting moofy stuffs
--
I am coercing blerfy stuffs
I am validerting blerfy stuffs


In [14]:
def validate(self, value):
    print('Validating {} for {}'.format(value, self))
    return value

def null_handler(self, null_if, value):
    print('Checking if {} for {} is null'.format(value, self))
    if null_if(value):
        return 'NULL I suppose'
    else:
        return value
    
def coercion_handler(null_if=lambda v: not v):
    def coerce_wrapper(fun):
        def wrapper(self, value):
            value = null_handler(self, null_if, value)
            coerced = fun(self, value)
            validate(self, coerced)
            return coerced
        return wrapper
    return coerce_wrapper
    
class Moofle():
    @coercion_handler(null_if=lambda v: v == 'NA')
    def coerce(self, value):
        print('Coercing value {}'.format(value))
        return value

Moofle().coerce('NA')
    

Checking if NA for <__main__.Moofle object at 0x7f3f1c646208> is null
Coercing value NULL I suppose
Validating NULL I suppose for <__main__.Moofle object at 0x7f3f1c646208>


'NULL I suppose'

In [51]:
import datetime

str(datetime.datetime.strptime('2017-01-31', '%Y-%m-%d'))


'2017-01-31 00:00:00'

In [53]:
import dateutil


datetime.datetime(2017, 1, 31, 0, 0)

In [64]:
#%%timeit
d = dateutil.parser.parse('01-31-2017')
dateutil.parser.parse(d)

TypeError: Parser must be a string or character stream, not datetime

In [55]:
%%timeit
datetime.datetime.strptime('2017-01-31', '%Y-%m-%d')

9.1 µs ± 233 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [23]:
import decimal
d = decimal.Decimal('103.155')
#d.quantize(10.exp(d), rounding=decimal.ROUND_HALF_EVEN)
round(d, 2)


Decimal('103.16')

In [33]:
g = (x for x in ['one', 'two', 'three'])
f = lambda: next((x for x in ['one', 'two', 'three']))
f()
f()

'one'

In [34]:
next(['one', 'two', 'three'])

TypeError: 'list' object is not an iterator