In [1]:
from beakerx import *
beakerx.pandas_display_default()

In [11]:
import pandas as pd
from pandas_schema import Schema, Column
from pandas_schema.validation import CanConvertValidation, InRangeValidation


class ExecutionResult(object):

    def __init__(self):
        self.result = True
        self.errors = []
        self.message = None


class Adapter(object):

    @staticmethod
    def validate_fields_map(model_fields: list, fields_map: dict):
        v_result = ExecutionResult()
        for c in model_fields:
            if c not in fields_map.values():
                v_result.errors.append('fields_map does not contain mapping to required field "{}"'.format(c))
        if len(v_result.errors) > 0:
            v_result.result = False
        return v_result

    def load(self):
        raise NotImplementedError('Please implement load method in adapter implementation')


class Model(object):

    def __init__(self):
        self.data = None
        self.schema = None

    @property
    def required_model_fields(self):
        fields = []
        for c in self.schema.columns:
            if not c.allow_empty:
                fields.append(c.name)
        return fields

    def load_from_adapter(self, adapter: Adapter):
        errors = adapter.load(self)
        if errors is None:
            print("Import done, no errors")
        else:
            for e in errors:
                print(e)


class ExcelAdapter(Adapter):

    def __init__(self, filename, fields_map):
        self.filename = filename
        self.fields_map = fields_map

    def load(self, model: Model):

        if Adapter.validate_fields_map(model.required_model_fields, self.fields_map).result:
            model.data = pd.read_excel(self.filename)
            model.data.rename(columns=self.fields_map, inplace=True)
            unmapped_columns = set(model.data.columns.tolist()) - set(self.fields_map.values())
            model.data.drop(unmapped_columns, axis=1, inplace=True)
            return model.schema.validate(model.data)
        else:
            raise Exception("Fields validation failed, see logs")


class DogModel(Model):

    def __init__(self):
        super().__init__()
        self.schema = Schema([
            Column(name='name'),
            Column(name='age', validations=[InRangeValidation(1, 15), CanConvertValidation(int)]),
            Column(name='isAlive', validations=[CanConvertValidation(bool)])
        ])

In [12]:
d = DogModel()
mmp={'calledas': 'name', 'oldness': 'age', 'active?': 'isAlive'}
adptr = ExcelAdapter(filename='dogs.xlsx', fields_map=mmp)
d.load_from_adapter(adptr)

{row: 2, column: "age"}: "0" was not in the range [1, 15)


In [13]:
d.data

Unnamed: 0,name,age,isAlive
0,Zeep,4,yes
1,Kiwi,7,yes
2,Nil,0,yes


In [14]:
d.required_model_fields

['name', 'age', 'isAlive']