In [1]:
import datetime
import pandas as pd
import pandas.testing as tm
import pydantic
from pydantic import ValidationError
import numpy as np

from waipawama import domain_accounting, utils

## Learning
- Problems will arise with type conversions
- especially np.nan and None is hard
- How can we make it easier?

Good things:
- pydantic has more datatypes -> better for the domain
- pydantic models can be rich, besides data defintions
- \+ the stuff we already know

Be smart of what we dont do.

### Import domain model and template

In [2]:
# Pydantic Data Model
model_accounting = domain_accounting.Accounting

### Properties of a Data Model object

In [3]:
# If you print this object and you have required fields
# without defaults it will give a validation error.
accounting = utils.DataTemplate(template=model_accounting)

In [4]:
try:
    print(accounting)
except ValidationError as e:
    print(e)
    print()
    print('All template functions are executing our pydantic model.')

8 validation errors for Accounting
Belegdat.
  field required (type=value_error.missing)
BetragEUR
  field required (type=value_error.missing)
Belegnr.
  field required (type=value_error.missing)
Jour. Dat.
  field required (type=value_error.missing)
Sollkto
  field required (type=value_error.missing)
Habenkto
  field required (type=value_error.missing)
Buchungstext
  field required (type=value_error.missing)
Gegenkto
  field required (type=value_error.missing)

All template functions are executing our pydantic model.


## Basic round trip python datatype to pandas DataFrame

- practically this never works
  because pandas is doing parsing

In [27]:
# List[dict]
data1 = [
     {'Belegdat.': '2018-09-30',
     'USt Text': None,
     'BetragEUR': 1.06,
     'Belegnr.': 'DB315',
     'Jour. Dat.': '2018-10-09',
     'Sollkto': 1800,
     'Habenkto': 3631,
     'Status': None,
     'Buchungstext': 'example 1',
     'Gegenkto': 1800,
     'USt %': 0,
     'USt Kto': 0},
    {'Belegdat.': '2018-09-30',
     'USt Text': None,
     'BetragEUR': 1.06,
     'Belegnr.': 'DB315',
     'Jour. Dat.': '2018-10-09',
     'Sollkto': 1800,
     'Habenkto': 3631,
     'Status': None,
     'Buchungstext': 'example 2',
     'Gegenkto': 5923,
     'USt %': 0,
     'USt Kto': 0}]

In [30]:
# List[dict] -> pd.DataFrame
df1 = pd.DataFrame(data1)
df1

Unnamed: 0,Belegdat.,USt Text,BetragEUR,Belegnr.,Jour. Dat.,Sollkto,Habenkto,Status,Buchungstext,Gegenkto,USt %,USt Kto
0,2018-09-30,,1.06,DB315,2018-10-09,1800,3631,,example 1,1800,0,0
1,2018-09-30,,1.06,DB315,2018-10-09,1800,3631,,example 2,5923,0,0


In [32]:
# pd.DataFrame -> List[dict]
data2 = df1.to_dict('records')

In [33]:
# the diff between data1 and data2 is only None -> np.nan
# potentially much more, but pydantic is build with that in mind:
# pydantic does parsing over validating
data1[1].items() ^ data2[1].items()

set()

## Round trip with our model

- this is more likely to work because pydantic is parsing for a typed output

In [37]:
accounting = utils.DataTemplate(template=model_accounting, by_alias=True)

df_model = accounting.dataframe(data1)
#df_model.BetragEUR = df_model.BetragEUR*2

In [38]:
# those tranforms are very much needed, otherwise its a mess.
# Also we are able to define in pydantic datetime.date,
# which is the right domain specification.
df_model['Belegdat.'] = pd.to_datetime(df_model['Belegdat.'])
df_model['Jour. Dat.'] = pd.to_datetime(df_model['Jour. Dat.'])

df1['Belegdat.'] = pd.to_datetime(df1['Belegdat.'])
df1['Jour. Dat.'] = pd.to_datetime(df1['Jour. Dat.'])

In [39]:
# see pandas docs for comparing dataframes
tm.assert_frame_equal(df1, df_model, check_dtype=False)

In [40]:
df1.to_dict('records')

[{'Belegdat.': Timestamp('2018-09-30 00:00:00'),
  'USt Text': None,
  'BetragEUR': 1.06,
  'Belegnr.': 'DB315',
  'Jour. Dat.': Timestamp('2018-10-09 00:00:00'),
  'Sollkto': 1800,
  'Habenkto': 3631,
  'Status': None,
  'Buchungstext': 'example 1',
  'Gegenkto': 1800,
  'USt %': 0,
  'USt Kto': 0},
 {'Belegdat.': Timestamp('2018-09-30 00:00:00'),
  'USt Text': None,
  'BetragEUR': 1.06,
  'Belegnr.': 'DB315',
  'Jour. Dat.': Timestamp('2018-10-09 00:00:00'),
  'Sollkto': 1800,
  'Habenkto': 3631,
  'Status': None,
  'Buchungstext': 'example 2',
  'Gegenkto': 5923,
  'USt %': 0,
  'USt Kto': 0}]

In [41]:
df_model.to_dict('records')

[{'Belegdat.': Timestamp('2018-09-30 00:00:00'),
  'USt Text': None,
  'BetragEUR': 1.06,
  'Belegnr.': 'DB315',
  'Jour. Dat.': Timestamp('2018-10-09 00:00:00'),
  'Sollkto': 1800,
  'Habenkto': 3631,
  'Status': None,
  'Buchungstext': 'example 1',
  'Gegenkto': 1800,
  'USt %': 0,
  'USt Kto': 0},
 {'Belegdat.': Timestamp('2018-09-30 00:00:00'),
  'USt Text': None,
  'BetragEUR': 1.06,
  'Belegnr.': 'DB315',
  'Jour. Dat.': Timestamp('2018-10-09 00:00:00'),
  'Sollkto': 1800,
  'Habenkto': 3631,
  'Status': None,
  'Buchungstext': 'example 2',
  'Gegenkto': 5923,
  'USt %': 0,
  'USt Kto': 0}]