In [46]:
import yaml
import json
from enum import Enum
from pydantic import (
    BaseModel,
    ValidationError,
    field_validator,
    field_serializer,
    model_validator,
    computed_field,
    ValidatorFunctionWrapHandler,
    ValidationInfo,
    Field,
    ConfigDict,
)

In [55]:
file_path = "mrm.yaml"

In [56]:
with open(file_path) as f:
    config = yaml.safe_load(f)

In [57]:
config

{'reference': {'database': 'workspace_db',
  'table': 'tb_spec_dataset',
  'train_data': {'start': 202001, 'end': 202212}},
 'metrics': {'keys': {'names': ['pk_1', 'pk_2'],
   'together': ['duplicidade'],
   'individual': {'pk_1': ['ausencia'], 'pk_2': ['ausencia']}},
  'features': {'numerigcal': {'col_a': ['media', 'moda', 'variancia'],
    'col_b': ['media'],
    'col_c': ['moda']},
   'categorical': {'col_d': ['ausencia']}},
  'target': 'target_name'}}

In [58]:
class ColumnMetrics(Enum):
    AUSENCIA = "ausencia"
    MEDIA = "media"
    MEDIANA = "mediana"
    MODA = "moda"
    VARIANCIA = "variancia"
    DESVIO_PADRAO = "desvio_padrao"

In [59]:
class ReferenceTrainData(BaseModel):
    start: int
    end: int


class SettingsReference(BaseModel):
    database: str
    table: str
    train_data: ReferenceTrainData


class MetricsKeys(BaseModel):
    names: list
    together: list
    individual: dict[str, list]


class MetricsFeature(BaseModel):
    numerigcal: dict[str, list[ColumnMetrics]]
    categorical: dict[str, list[ColumnMetrics]]


class SettingsMetrics(BaseModel):
    keys: MetricsKeys
    features: MetricsFeature
    target: str


class Settings(BaseModel):
    reference: SettingsReference
    metrics: SettingsMetrics

In [60]:
try:
    settings = Settings.model_validate(config)
except ValidationError as e:
    settings = None
    print(e)

settings

Settings(reference=SettingsReference(database='workspace_db', table='tb_spec_dataset', train_data=ReferenceTrainData(start=202001, end=202212)), metrics=SettingsMetrics(keys=MetricsKeys(names=['pk_1', 'pk_2'], together=['duplicidade'], individual={'pk_1': ['ausencia'], 'pk_2': ['ausencia']}), features=MetricsFeature(numerigcal={'col_a': [<ColumnMetrics.MEDIA: 'media'>, <ColumnMetrics.MODA: 'moda'>, <ColumnMetrics.VARIANCIA: 'variancia'>], 'col_b': [<ColumnMetrics.MEDIA: 'media'>], 'col_c': [<ColumnMetrics.MODA: 'moda'>]}, categorical={'col_d': [<ColumnMetrics.AUSENCIA: 'ausencia'>]}), target='target_name'))

In [61]:
settings.reference.train_data.end

202212

In [62]:
settings.metrics.features.numerigcal['col_a']

[<ColumnMetrics.MEDIA: 'media'>,
 <ColumnMetrics.MODA: 'moda'>,
 <ColumnMetrics.VARIANCIA: 'variancia'>]