### Code for parsers

In [1]:
import re

Text sample from a Journal

In [2]:
lines = [
    '# Groceries',
    '; account equity',
    '2024-02-02 paycheck',
    '2024-01-07  Safeway ; weekly groceries',
    '    income:yoyodyne      $5,000.00',
    '    income:yoyodyne      $5,000.00  ; Feb',
]

In [3]:
for s in lines:
    cmnd, *comment = re.split(r'[;#]', s)
    print(cmnd, comment)


 [' Groceries']
 [' account equity']
2024-02-02 paycheck []
2024-01-07  Safeway  [' weekly groceries']
    income:yoyodyne      $5,000.00 []
    income:yoyodyne      $5,000.00   [' Feb']


Make sure `re.sub` strips out the chars we want from an amount.

In [4]:
amounts = [
    '123.45',
    '$123.45',
    '-123.45',
    '$-123.45',
    '-$123.45',
    '9,123.45',
    '$9,123.45',
    '-9,123.45',
    '$-9,123.45',
    '-$9,123.45',    
]

In [5]:
for s in amounts:
    t = re.sub(r'[,$]','',s)
    print(float(t))


123.45
123.45
-123.45
-123.45
-123.45
9123.45
9123.45
-9123.45
-9123.45
-9123.45


In [6]:
s = ' type: A'

In [7]:
m = re.search(r'(.*?)type: (\w)(.*)', s)

In [8]:
m[1] + m[3]

' '

In [9]:
m[2]

'A'

### TOML

We're using TOML (a JSON alternative) for configuration files.

In [10]:
import tomllib
from pathlib import Path

In [11]:
p = Path.home() / 'Personal/Finances/dex.toml'

In [12]:
p.is_file()

True

In [13]:
with open(p, 'rb') as f:
    res = tomllib.load(f)

In [14]:
res

{'terminology': {'cname': 'credit', 'dname': 'debit'},
 'csv': {'occu': {'description': 'rec["Description"] + rec["Extended Description"]',
   'date': 'rec["Posting Date"]',
   'amount': 'abs(float(rec["Amount"]))',
   'credit': 'rec["Amount"].startswith("-")'},
  'chase': {'description': 'rec["Description"]',
   'date': 'rec["Post Date"]',
   'amount': 'abs(float(rec["Amount"]))',
   'column': 'rec["Amount"].startswith("-")'},
  'citi': {'description': 'desc',
   'date': 'date',
   'amount': 'amount',
   'column': 'column'}}}

What we learned:
* each section in the TOML document is a key in the resulting dict
* if a section has subsections, the section name associated with another dictionary where the key is the subsection name (minus the section name)

In [15]:
from collections import namedtuple

In [16]:
ColMap = namedtuple('ColMap', ['description', 'date', 'amount', 'column'])

In [17]:
res

{'terminology': {'cname': 'credit', 'dname': 'debit'},
 'csv': {'occu': {'description': 'rec["Description"] + rec["Extended Description"]',
   'date': 'rec["Posting Date"]',
   'amount': 'abs(float(rec["Amount"]))',
   'credit': 'rec["Amount"].startswith("-")'},
  'chase': {'description': 'rec["Description"]',
   'date': 'rec["Post Date"]',
   'amount': 'abs(float(rec["Amount"]))',
   'column': 'rec["Amount"].startswith("-")'},
  'citi': {'description': 'desc',
   'date': 'date',
   'amount': 'amount',
   'column': 'column'}}}

In [19]:
from dexter.config import Config

In [20]:
Config.init('/Users/conery/Personal/Finances/dex.toml')

In [21]:
Config.cname

'credit'

In [22]:
Config.colmaps

{'occu': {'description': <function <lambda>(rec)>,
  'date': <function <lambda>(rec)>,
  'amount': <function <lambda>(rec)>,
  'credit': <function <lambda>(rec)>},
 'chase': {'description': <function <lambda>(rec)>,
  'date': <function <lambda>(rec)>,
  'amount': <function <lambda>(rec)>,
  'column': <function <lambda>(rec)>},
 'citi': {'description': <function <lambda>(rec)>,
  'date': <function <lambda>(rec)>,
  'amount': <function <lambda>(rec)>,
  'column': <function <lambda>(rec)>}}

### Colmaps

In [23]:
rec = {'amount': -123, 'description': 'aloha'}

In [24]:
eval(f'abs(dct["amount"])', locals = {'dct': rec}, globals={})

123

In [25]:
import os

In [26]:
os.getenv('HOME')

'/Users/conery'

In [27]:
eval('os.getenv("HOME")')

'/Users/conery'

In [28]:
# eval('os.getenv("HOME")', locals = {'dct': rec}, globals={})

In [29]:
lambda dct: abs(dct['amount'])

<function __main__.<lambda>(dct)>

In [30]:
f = lambda dct: abs(dct['amount'])

In [31]:
f(rec)

123

In [32]:
eval(f'dct["description"].capitalize()', locals = {'dct': rec}, globals={})

'Aloha'

In [36]:
s = '; a budget: aloha:'

In [37]:
re.findall(r'\w+:', s)

['budget:', 'aloha:']

In [38]:
[f'#{s[:-1]}' for s in re.findall(r'\w+:', s)]

['#budget', '#aloha']