In [None]:
import datetime
import pathlib
import pandas as pd
import pandas.testing as tm
import pydantic
from pydantic import ValidationError
import numpy as np

from waipawama import domain_accounting, utils

## Learning
- Problems will arise with type conversions
- especially np.nan and None is hard
- How can we make it easier?

## Good things:
- pydantic has more datatypes -> better for the domain
- pydantic models can be rich, besides data defintions
- \+ the stuff we already know

Be smart of what we dont do.

## First Solution for np.nan:
You need a smart domain driven nan filling strategy:
- floats -> np.nan (no change)
- categorial ints -> 0 (maybe)
- strings -> None
- dates -> None

Or you make your own pydantic data types

### Import domain model and template

In [None]:
# Pydantic Data Model
model_accounting = domain_accounting.Accounting

In [None]:
# Print Schema to Json
p = pathlib.Path('./exmple_schema.json')
p.write_text(domain_accounting.Accounting.schema_json(by_alias=False, indent=2))

In [None]:
domain_accounting.Accounting.schema_json(indent=2)

### Properties of a Data Model object

In [None]:
# If you print this object and you have required fields
# without defaults it will give a validation error.
accounting = utils.DataTemplate(template=model_accounting)

In [None]:
try:
    print(accounting)
except ValidationError as e:
    print(e)
    print()
    print('All template functions are executing our pydantic model.')

## Basic round trip python datatype to pandas DataFrame

- practically this never works
  because pandas is doing parsing

In [None]:
# List[dict]
data1 = [
     {'Belegdat.': '2018-09-30',
     'USt Text': np.nan,
     'BetragEUR': 1.06,
     'Belegnr.': 'DB315',
     'Jour. Dat.': '2018-10-09',
     'Sollkto': 1800,
     'Habenkto': 3631,
     'Status': np.nan,
     'Buchungstext': 'example 1',
     'Gegenkto': 1800,
     'USt %': 0,
     'USt Kto': 0},
    {'Belegdat.': '2018-09-30',
     'USt Text': None,
     'BetragEUR': np.nan,
     'Belegnr.': 'DB315',
     'Jour. Dat.': '2018-10-09',
     'Sollkto': 1800,
     'Habenkto': 3631,
     'Status': None,
     'Buchungstext': 'example 2',
     'Gegenkto': 5923,
     'USt %': 0,
     'USt Kto': 0}]

In [None]:
# List[dict] -> pd.DataFrame
df1 = pd.DataFrame(data1)
df1

In [None]:
df_test = df1.fillna(value='i', downcast='infer')

In [None]:
df1.dtypes

In [None]:
df_test.dtypes

In [None]:
# pd.DataFrame -> List[dict]
data2 = df1.to_dict('records')

In [None]:
# the diff between data1 and data2 is only None -> np.nan
# potentially much more, but pydantic is build with that in mind:
# pydantic does parsing over validating
data1[1].items() ^ data2[1].items()

## Round trip with our model

- this is more likely to work because pydantic is parsing for a typed output

In [None]:
accounting = utils.DataTemplate(template=model_accounting, by_alias=True)

df_model = accounting.dataframe(data1)
#df_model.BetragEUR = df_model.BetragEUR*2

In [None]:
# those tranforms are very much needed, otherwise its a mess.
# Also we are able to define in pydantic datetime.date,
# which is the right domain specification.
df_model['Belegdat.'] = pd.to_datetime(df_model['Belegdat.'])
df_model['Jour. Dat.'] = pd.to_datetime(df_model['Jour. Dat.'])

df1['Belegdat.'] = pd.to_datetime(df1['Belegdat.'])
df1['Jour. Dat.'] = pd.to_datetime(df1['Jour. Dat.'])

In [None]:
# see pandas docs for comparing dataframes
tm.assert_frame_equal(df1, df_model, check_dtype=False)

In [None]:
df1.to_dict('records')

In [None]:
df_model.to_dict('records')