In [3]:
import pathlib as pl
import sys
import json
import subprocess as sp

import pandas as pd
import numpy as np
import voluptuous as vt

sys.path.append("../")

import payulator as pu


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Configure

In [4]:
ROOT = pl.Path.home() / "merriweather"
LOAN_DIRS = [
    ROOT / "loans" / "active",
    ROOT / "loans" / "finished",
]
%ls {LOAN_DIRS[0]}

[0m[01;34mC-Vallyon-20190604[0m/  [01;34mEBT-20210601[0m/  [01;34mEBT-20220706[0m/             [01;34mResilio-20190514[0m/
[01;34mC-Vallyon-20220201[0m/  [01;34mEBT-20220412[0m/  [01;34mHaldane-Willis-20200618[0m/  [01;34mS-Adli-20220119[0m/


# Define useful functions

In [22]:
def collect_payment_schedules(loan_dirs: list[pl.Path]) -> dict[str, pd.DataFrame]:
    """
    Given a list of directory paths (list of strings or Path objects),
    search every subdirectory of every given directory for a file
    named ``"payment_schedule.csv"``.
    Read each such payment schedule as a DataFrame and return
    a dictionary of the form loan_directory_name -> payment schedule
    DataFrame.
    """
    d = {}
    for root in loan_dirs:
        for p in root.iterdir():
            if not p.is_dir():
                continue
#             for q in p.glob("payment_schedule.csv"):
#                 d[q.parent.name] = (
#                     pd.read_csv(q, parse_dates=["payment_date"])
#                 )
            for q in p.glob("parameters.json"):
                try:
                    loan = pu.read_loan(q)
                except vt.MultipleInvalid as e:
                    print(f"Skipping {p} because of an error in its JSON paremeters.")
                    print(e)
                    
                d[loan.code] = loan.payments()["payment_schedule"]
            
    return d

def compute_balances(loan_dirs: list[pl.Path], date: str) -> pd.DataFrame:
    """
    Read all the payment schedules in all the subdirectories 
    of the given loan directories, and compile their outstanding loan
    balances (pricipals) up to the given date.
    Return the results as a DataFrame with the columns:
    
    - ``"loan_code"``
    - ``"first_payment_date"``: first scheduled payment date of the loan
    - ``"last_payment_date"``: last scheduled payment date of the loan
    - ``"balance_date"``: ``date``
    - ``"balance"``: balance (principal) of the loan up to ``date``
    - ``"total_balance"``: sum of all the balances in the DataFrame
    
    """
    d = collect_payment_schedules(loan_dirs)
    if not d:
        return pd.DataFrame()
    
    rows = []
    for name, f in d.items():
        f = f.set_index("payment_date")
        start_date = f.index[0]
        end_date = f.index[-1]
        s = f.loc[:date, "ending_balance"]
        if not s.empty:
            b = s.iat[-1]
        else:
            b = np.nan
        rows.append([name, start_date, end_date, date, b])
    
    return  (
        pd.DataFrame(rows, columns=["loan_code", "first_payment_date", 
          "last_payment_date", "balance_date", "balance"])
        .assign(total_balance = lambda x: x["balance"].sum())
        .sort_values("first_payment_date")
        .reset_index(drop=True)
    )

def compute_revenue(
    loan_dirs: list[pl.Path], 
    start_date: str=None, 
    end_date: str=None, 
    freq: str="M",
) -> pd.DataFrame:
    """
    Use the function :func:`collect_payment_schedules` to collect all
    payment schedule in the given directories.
    Assume each schedule has the format expected by the function
    :func:`payulator.aggregate_payment_schedules`, and apply that
    function at the given frequency to the schedules.
    Slice to the result given start and end dates (inclusive), 
    append a few summary columns, and return the resulting DataFrame
    with the columns
    
    - ``"payment_date"``    
    - ``"principal_payment"``   
    - ``"interest_payment"`` 
    - ``"fee_payment"``
    - ``"total_payment"``
    - ``"revenue"``: interest_payment + fee_payment
    - ``"principal_payment_cumsum"``: cumulative sum of principal_payment  
    - ``"interest_payment_cumsum"``: cumulative sum of interest_payment    
    - ``"total_payment_cumsum"``: cumulative sum of total_payment
    - ``"revenue_cumsum"``: cumulative sum of revenue
    
    """
    # Collect payment schedules from all loans on record
    d = collect_payment_schedules(loan_dirs)
    if not d:
        return pd.DataFrame()
    
    # Aggregate
    return (
        pu.aggregate_payment_schedules(
            d.values(), 
            start_date=start_date, 
            end_date=end_date, 
            freq=freq
        )
        .assign(revenue=lambda x: x.interest_payment + x.fee_payment)
        .assign(revenue_cumsum=lambda x: x.revenue.cumsum())
    )

def estimate_future_fund(
    loan_dirs, 
    current_date, 
    future_date, 
    current_fund, 
    funding, 
    funding_freq,
) -> pd.DataFrame:
    """
    """
    # Build date range from current date to future date using given funding freq.
    # Note that pandas.date_range does not do this exactly.
    date = pd.to_datetime(current_date)
    rng = []
    while date <= pd.to_datetime(future_date):
        rng.append(date)
        date += pd.tseries.frequencies.to_offset(funding_freq)
    
    loans = compute_revenue(root_dir, current_date, future_date, "D").set_index("payment_date")
    return (
        pd.DataFrame(index=rng)
        .assign(start_fund = current_fund)
        .assign(funding = pd.Series(funding, index=rng).shift().fillna(0))
        .assign(funding_cumsum = lambda x: x["funding"].cumsum())
        .assign(loan_revenue_cumsum = lambda x: loans.loc[x.index, "total_payment_cumsum"])
        .assign(fund = lambda x: x["start_fund"] + x["funding_cum"] + x["loan_revenue_cumsum"])
        .drop(["funding"], axis=1)
        .rename_axis("date")
        .reset_index()
    )

def get_rent_etc(
    money_log_dir, 
    start_date: str=None, 
    end_date: str=None,
) -> pd.DataFrame:
    """
    Load all the money log CSVs in the directory ``money_log_dir``,
    subset it to transactions with the description "rent & utilities",
    then subset it the given start and end date, and return the resulting DataFrame.
    """
    money_log_dir = pl.Path(money_log_dir)
    # Get rent + utility expenses for date range from files money_log_<date 1>--<date 2>.csv
    f = (
        pd.concat([
            pd.read_csv(p, parse_dates=["date"]) 
            for p in  money_log_dir.glob("money_log*.csv")
        ])
        .loc[lambda x: x.description == "rent & utilities"]
        .copy()
    )
    if start_date is not None:
        f = f.loc[lambda x: x.date >= pd.to_datetime(start_date)].copy()
    if end_date is not None:
        f = f.loc[lambda x: x.date <= pd.to_datetime(end_date)].copy()
    
    return f.assign(date=lambda x: x.date.dt.strftime("%Y-%m-%d"))
                     

# Compute revenue, costs, etc.

In [30]:
compute_revenue(LOAN_DIRS, "2021-04-01", "2022-03-31")


  pd.concat(payment_schedules)


Unnamed: 0,payment_date,principal_payment,interest_payment,fee_payment,total_payment,principal_payment_cumsum,interest_payment_cumsum,fee_payment_cumsum,total_payment_cumsum,revenue,revenue_cumsum
0,2021-04-30,62064.2,1335.55,0,63399.75,62064.2,1335.55,0,63399.75,1335.55,1335.55
1,2021-05-31,2081.31,818.46,0,2899.77,64145.51,2154.01,0,66299.52,818.46,2154.01
2,2021-06-30,2098.53,1301.21,0,3399.74,66244.04,3455.22,0,69699.26,1301.21,3455.22
3,2021-07-31,2115.92,1700.51,0,3816.43,68359.96,5155.73,0,73515.69,1700.51,5155.73
4,2021-08-31,2133.44,1682.98,0,3816.42,70493.4,6838.71,0,77332.11,1682.98,6838.71
5,2021-09-30,2151.11,1665.32,0,3816.43,72644.51,8504.03,0,81148.54,1665.32,8504.03
6,2021-10-31,2168.93,1647.5,0,3816.43,74813.44,10151.53,0,84964.97,1647.5,10151.53
7,2021-11-30,2186.89,1629.53,0,3816.42,77000.33,11781.06,0,88781.39,1629.53,11781.06
8,2021-12-31,2205.01,1611.42,0,3816.43,79205.34,13392.48,0,92597.82,1611.42,13392.48
9,2022-01-31,2223.27,1593.16,0,3816.43,81428.61,14985.64,0,96414.25,1593.16,14985.64


In [31]:
# Compute rents and pc of home
rents = get_rent_etc(pl.Path.home() / "personal_finance/data", "2021-04-01", "2022-03-31")
display(rents)

herb_rent_etc = (
    -rents.loc[lambda x: x.date <= "2022-01-13", "amount"].sum()
    + 286  # last rent payment for The Herb 
    + 41.61  # last electricity payment for The Herb
)

smokes_rent_etc = (
    -rents.loc[lambda x: x.date >= "2022-01-20", "amount"].sum()
    - 286  # don't count Herb payment
    - 41.61  # don't count Herb payment
)

rents_etc = [
    {
        "flat_name": "The Herb",
        "rent_etc": herb_rent_etc,
        "office_area": 11.34,  # Office area i pay for
        "total_area": 26.025,  # Total area i pay for
    },
    {
        "flat_name": "The Smokes",
        "rent_etc": smokes_rent_etc,
        "office_area": 11.34,  # Office area i pay for
        "total_area": 26.025,  # Total area i pay for
    },    
]
display(rents_etc)

Unnamed: 0,date,amount,description,category,comment
8,2021-04-08,-292.5,rent & utilities,housing,repeats weekly
11,2021-04-15,-292.5,rent & utilities,housing,repeats weekly
16,2021-04-22,-292.5,rent & utilities,housing,repeats weekly
18,2021-04-29,-292.5,rent & utilities,housing,repeats weekly
29,2021-05-06,-292.5,rent & utilities,housing,repeats weekly
32,2021-05-13,-292.5,rent & utilities,housing,repeats weekly
42,2021-05-20,-292.5,rent & utilities,housing,repeats weekly
48,2021-05-27,-292.5,rent & utilities,housing,repeats weekly
56,2021-06-03,-292.5,rent & utilities,housing,repeats weekly
60,2021-06-10,-292.5,rent & utilities,housing,repeats weekly


[{'flat_name': 'The Herb',
  'rent_etc': 17517.61,
  'office_area': 11.34,
  'total_area': 26.025},
 {'flat_name': 'The Smokes',
  'rent_etc': 2683.71,
  'office_area': 11.34,
  'total_area': 26.025}]

In [34]:
# Summarize
d = {}

start_date = pd.to_datetime("2021-04-01")
end_date = pd.to_datetime("2022-03-31")
TAX_DIR = ROOT / "taxes" / f"{start_date:%Y%m%d}--{end_date:%Y%m%d}"
%ls {TAX_DIR}

d["revenue"] = (
    compute_revenue(LOAN_DIRS, start_date=f"{start_date:%Y-%m-%d}", end_date=f"{end_date:%Y-%m-%d}")
    .revenue
    .sum()
)

# Use here the IRD square meter rate option described at 
# https://www.ird.govt.nz/income-tax/income-tax-for-businesses-and-organisations/types-of-business-expenses/using-your-home-for-your-business
# namely, expense = (yearly rent) * (office area i pay for)/(total area i pay for) + IRD_SMR * (office area i pay for)
IRD_SMR = 44.75  # in dollars per square meter
eo = 0
for r in rents_etc:
    eo += (
        r["rent_etc"] * (r["office_area"] / r["total_area"])
        + IRD_SMR * r["office_area"]
    )
d["expenses_office"] = eo

d["expenses_other"] = pd.read_csv(TAX_DIR / "expenses_other.csv").amount.sum() 
d["expenses"] = d["expenses_other"] + d["expenses_office"]
d["gross_profit"] = d["revenue"] - d["expenses"] 
f = (
    pd.DataFrame(d, index=[0])
    .round(0)
    .assign(
        tax=lambda x: 0.28 * x.gross_profit,
        net_profit=lambda x: x.gross_profit - x.tax,
        net_profit_alt=lambda x: x.revenue - x.expenses_other - x.tax,
        net_profit_ratio=lambda x: x.net_profit / x.revenue,
        net_profit_ratio_alt=lambda x: x.net_profit_alt / x.revenue,
    )
)
f.T

[0m[01;34mexpense_receipts[0m/  expenses_office.csv  expenses_other.csv


  pd.concat(payment_schedules)


Unnamed: 0,0
revenue,18758.0
expenses_office,9778.0
expenses_other,2763.0
expenses,12541.0
gross_profit,6217.0
tax,1740.76
net_profit,4476.24
net_profit_alt,14254.24
net_profit_ratio,0.238631
net_profit_ratio_alt,0.759902


# Compute assets

In [None]:
# Loan balances
d = {}

date = "2021-03-31"
loan_balances = compute_balances(LOAN_DIRS, date)
display(loan_balances)

d["outstanding_balances"] = loan_balances.balance.sum()
d["cash"] = 18_685

# Compute other assets excluding depreciation

assets = 0 
tax_dir = ROOT / "taxes"
for subdir in sorted(tax_dir.iterdir()):
    for path in subdir.iterdir():
        if path.name == "expenses_other.csv":
            asset = (
                pd.read_csv(path)
                .loc[lambda x: x.is_asset.str.contains("yes", case=False)]
                .amount
                .sum()
            )
            assets += asset

d["non_loan_assets"] = assets

f = (
    pd.DataFrame(d, index=[0])
    .round(0)
    .assign(total_assets=lambda x: x.sum(axis=1))
)
f.T