In [None]:
import numpy as np
import pandas as pd
import redis

from fastcounting import helper

you have to run everything exactly once to not run in bugs :).

In [None]:
month = '2017-13'
r = redis.Redis(**helper.Helper().rediscred, decode_responses=True)

In [None]:
# main thing
files = find_batch_files(month)

dfinput = read_lexware_journal(files)
df = dfinput.copy()

In [None]:
df = clean_lexware_journal(df)

In [None]:
first_walk(df, files[0])

In [None]:
df['Nr.'].ffill(inplace=True) # this we have to do between first and second walk

second_walk(df)

In [None]:
def find_batch_files(month):
    p = helper.Helper().datafolder(month)
    files = [file for file in p.iterdir() if file.parts[-1].lower().startswith('journal')]
    return files

def read_lexware_journal(files, nrows=None):
    """Read xlxs from the default folder for each year e.g. month=2018-13 or actual month."""
    data = pd.read_excel(
        files[0], bom=True, sep=';', encoding='latin-1', decimal=',', thousands='.',
        dayfirst=True, skiprows=1, parse_dates=['Belegdat.', 'Buchdat.', 'Jour. Dat.'], nrows=nrows)
    return data

def clean_lexware_journal(df):
    df = dfinput.copy()
    for column in ['Belegdat.', 'Buchdat.', 'Jour. Dat.']:
        df[column] = df[column].ffill() # this works cause data is sorted
        df[column] = (df[column] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
        df[column] = df[column].apply(str)

    # multiply all currency amounts to get integers
    for money_column in ['SollEUR', 'HabenEUR', 'USt H-EUR', 'USt-S EUR']:
        df[money_column] = df[money_column]*100
        # we don't cast to integer because we have slightly meaningful nans
        df[money_column] = df[money_column].round(0)

    dimensions = ['Sollkto', 'Habenkto', 'USt Kto-H', 'USt Kto-S']
    df.fillna(value={dimension: 0.0 for dimension in dimensions}, inplace=True)
    return df

In [None]:
def first_walk(df, batchtext):
    """
    First of two walks for splitted multirow transactions only the first row
    has general information, we run only over those here.
    """
    batchID = r.incr('next_batchID')  # rollback and diff functionality
    r.hmset(f'batchID:{batchID}',
        {'text': batchtext})
    for i in df.index:
        # get unique id from database (threadsave)
        generalID = r.incr('next_generalID')
        # create mapping for rollback if we only run first_walk
        r.sadd(f'batch:general:{batchID}', generalID)
        # create temporary mapping
        r.set(df.at[i, 'Nr.'], generalID, ex=300)
        # store data in hash
        r.hmset(f'generalID:{generalID}',
            {'date': df.at[i, 'Belegdat.'],
            'jourdat': df.at[i, 'Jour. Dat.'],
            'buchdat': df.at[i, 'Buchdat.'],
            'status': df.at[i, 'Status'],
            'belegnr': df.at[i, 'Belegnr.']})

        
def atomic_to_redis(i, konto, betrag, kontenseite, ust=None):
    # get unique id from database (threadsave)
    atomicID = r.incr('next_atomicID')
    # get temporary mapping we created in the first walk
    generalID = r.get(df.at[i, 'Nr.'])
    batchID = r.get('next_batchID')
    # create a lookup set for all atomis in a batch
    r.sadd(f'batch:atomic:{batchID}', atomicID)
    # create stable mapping- general:atomic:
    r.sadd(f'general:atomic:{generalID}', atomicID)
    # create mapping accountID:atomicID
    r.sadd(f'account:atomic:{konto}', atomicID)
    # create datefilter atomic:date
    r.zadd('atomic:date', {atomicID: int(df.at[i, 'Belegdat.'])}) # could think about splitting the key into years
    # store data in hash + mapping atomic:general + mapping atomic+account
    r.hmset(f'atomicID:{atomicID}',
           {'generalID': generalID,
            'accountID': konto,
            'text': df.at[i, 'Buchungstext'],
            'amount': betrag,
            'kontenseite': kontenseite,
            'batchID': batchID})
        
def second_walk(df):
    """
    Second and last walk, now we walk over every row and we unpack up to 4 dimensions per row.
    There are 3 types of accounting transactions in this row based lexware export.
    1. automatic transaction, like ust payment on revenues
    2. split multirow transaction, like payment of import taxes and handling with dhl.
        important to note you can make split multirow transactions with duplicated 
        accounts thats why we cant use a dictionary here.
    3. standard account to account mapping
    + every combination from the above
    It comes handy that split multirow transaction are seperated in rows.
    """
    for i in df.index:
        if df.at[i, 'Sollkto']:
            atomic_to_redis(i, df.at[i, 'Sollkto'], -df.at[i, 'SollEUR'], 'Soll')

        if df.at[i, 'Habenkto']:
            atomic_to_redis(i, df.at[i, 'Habenkto'], df.at[i, 'HabenEUR'], 'Haben')

        if df.at[i, 'USt Kto-H']:
            atomic_to_redis(i, df.at[i, 'USt Kto-H'], df.at[i, 'USt H-EUR'], 'Haben')

        if df.at[i, 'USt Kto-S']:
            atomic_to_redis(i, df.at[i, 'USt Kto-S'], -df.at[i, 'USt-S EUR'], 'Soll')