In [None]:
import pathlib as pl
import sys
from collections import OrderedDict
import json
import subprocess as sp

import pandas as pd
import numpy as np
import voluptuous as vt

sys.path.append("../")

import payulator as pu


%load_ext autoreload
%autoreload 2

# Configure

In [None]:
ROOT = pl.Path.home() / "merriweather"
LOAN_DIRS = [
    ROOT / "loans" / "active",
    ROOT / "loans" / "finished",
]
%ls {LOAN_DIRS[0]}

# Define useful functions

In [None]:
def collect_payment_schedules(loan_dirs):
    """
    Given a list of directory paths (list of strings or Path objects),
    search every subdirectory of every given directory for a file
    named ``"payment_schedule.csv"``.
    Read each such payment schedule as a DataFrame and return
    a dictionary of the form loan_directory_name -> payment schedule
    DataFrame.
    """
    d = {}
    for root in loan_dirs:
        for p in root.iterdir():
            if not p.is_dir():
                continue
#             for q in p.glob("payment_schedule.csv"):
#                 d[q.parent.name] = (
#                     pd.read_csv(q, parse_dates=["payment_date"])
#                 )
            for q in p.glob("parameters.json"):
                try:
                    loan = pu.read_loan(q)
                except vt.MultipleInvalid as e:
                    print(f"Skipping {p} because of an error in its JSON paremeters.")
                    print(e)
                    
                d[loan.code] = loan.payments()["payment_schedule"]
            
    return d

def compute_balances(loan_dirs, date):
    """
    Read all the payment schedules in all the subdirectories 
    of the given loan directories, and compile their outstanding loan
    balances (pricipals) up to the given date.
    Return the results as a DataFrame with the columns:
    
    - ``"loan_code"``
    - ``"first_payment_date"``: first scheduled payment date of the loan
    - ``"last_payment_date"``: last scheduled payment date of the loan
    - ``"balance_date"``: ``date``
    - ``"balance"``: balance (principal) of the loan up to ``date``
    - ``"total_balance"``: sum of all the balances in the DataFrame
    
    """
    d = collect_payment_schedules(loan_dirs)
    if not d:
        return pd.DataFrame()
    
    rows = []
    for name, f in d.items():
        f = f.set_index("payment_date")
        start_date = f.index[0]
        end_date = f.index[-1]
        s = f.loc[:date, "ending_balance"]
        if not s.empty:
            b = s.iat[-1]
        else:
            b = np.nan
        rows.append([name, start_date, end_date, date, b])
    
    return  (
        pd.DataFrame(rows, columns=["loan_code", "first_payment_date", 
          "last_payment_date", "balance_date", "balance"])
        .assign(total_balance = lambda x: x["balance"].sum())
        .sort_values("first_payment_date")
        .reset_index(drop=True)
    )

def compute_revenue(loan_dirs, start_date=None, end_date=None, freq="M"):
    """
    Use the function :func:`collect_payment_schedules` to collect all
    payment schedule in the given directories.
    Assume each schedule has the format expected by the function
    :func:`payulator.aggregate_payment_schedules`, and apply that
    function at the given frequency to the schedules.
    Slice to the result given start and end dates (inclusive), 
    append a few summary columns, and return the resulting DataFrame
    with the columns
    
    - ``"payment_date"``    
    - ``"principal_payment"``   
    - ``"interest_payment"`` 
    - ``"fee_payment"``
    - ``"total_payment"``
    - ``"revenue"``: interest_payment + fee_payment
    - ``"principal_payment_cumsum"``: cumulative sum of principal_payment  
    - ``"interest_payment_cumsum"``: cumulative sum of interest_payment    
    - ``"total_payment_cumsum"``: cumulative sum of total_payment
    - ``"revenue_cumsum"``: cumulative sum of revenue
    
    """
    # Collect payment schedules from all loans on record
    d = collect_payment_schedules(loan_dirs)
    if not d:
        return pd.DataFrame()
    
    # Aggregate
    return (
        pu.aggregate_payment_schedules(
            d.values(), 
            start_date=start_date, 
            end_date=end_date, 
            freq=freq
        )
        .assign(revenue=lambda x: x.interest_payment + x.fee_payment)
        .assign(revenue_cumsum=lambda x: x.revenue.cumsum())
    )

def estimate_future_fund(loan_dirs, current_date, future_date, current_fund, 
  funding, funding_freq):
    """
    """
    # Build date range from current date to future date using given funding freq.
    # Note that pandas.date_range does not do this exactly.
    date = pd.to_datetime(current_date)
    rng = []
    while date <= pd.to_datetime(future_date):
        rng.append(date)
        date += pd.tseries.frequencies.to_offset(funding_freq)
    
    loans = compute_revenue(root_dir, current_date, future_date, "D").set_index("payment_date")
    return (
        pd.DataFrame(index=rng)
        .assign(start_fund = current_fund)
        .assign(funding = pd.Series(funding, index=rng).shift().fillna(0))
        .assign(funding_cumsum = lambda x: x["funding"].cumsum())
        .assign(loan_revenue_cumsum = lambda x: loans.loc[x.index, "total_payment_cumsum"])
        .assign(fund = lambda x: x["start_fund"] + x["funding_cum"] + x["loan_revenue_cumsum"])
        .drop(["funding"], axis=1)
        .rename_axis("date")
        .reset_index()
    )


# Compute revenue, costs, etc.

In [None]:
compute_revenue(LOAN_DIRS, "2021-04-01", "2022-03-31")


In [None]:
# Summarize
d = {}

start_date = pd.to_datetime("2021-04-01")
end_date = pd.to_datetime("2022-03-31")
TAX_DIR = ROOT / "taxes" / f"{start_date:%Y%m%d}--{end_date:%Y%m%d}"
%ls {TAX_DIR}

d["revenue"] = (
    compute_revenue(LOAN_DIRS, start_date=f"{start_date:%Y-%m-%d}", end_date=f"{end_date:%Y-%m-%d}")
    .revenue
    .sum()
)

# Office expeneses can claim for home office, which occupies 24% of home.
# Download flat account transactions CSV from bank for this.
rent =  (
    pd.read_csv(TAX_DIR / "expenses_office.csv")
    .fillna("nada")
    .loc[lambda x: x["Memo/Description"].str.contains("from A Raichev", case=False)]
    .loc[lambda x: x["TP part"].str.contains("rent", case="False")]
    .loc[: , "Amount"]
    .sum()
    - 25*52  # Rent & utils - utils
)
display(rent)

# Use here the IRD square meter rate option described at 
# https://www.ird.govt.nz/income-tax/income-tax-for-businesses-and-organisations/types-of-business-expenses/using-your-home-for-your-business
# namely, expense = (yearly rent) * (office area i pay for)/(total area i pay for) + 43$ * (office area i pay for)
IRD_SMR = 43
d["expenses_office"] = rent * (11.34 / 26.025) + IRD_SMR * (11.34)

d["expenses_other"] = pd.read_csv(TAX_DIR / "expenses_other.csv").amount.sum() 
d["expenses"] = d["expenses_other"] + d["expenses_office"]
d["gross_profit"] = d["revenue"] - d["expenses"] 
f = (
    pd.DataFrame(d, index=[0])
    .round(0)
    .assign(
        tax=lambda x: 0.28 * x.gross_profit,
        net_profit=lambda x: x.gross_profit - x.tax,
        net_profit_alt=lambda x: x.revenue - x.expenses_other - x.tax,
        net_profit_ratio=lambda x: x.net_profit / x.revenue,
        net_profit_ratio_alt=lambda x: x.net_profit_alt / x.revenue,
    )
)
f.T

# Compute assets

In [None]:
# Loan balances
d = {}

date = "2021-03-31"
loan_balances = compute_balances(LOAN_DIRS, date)
display(loan_balances)

d["outstanding_balances"] = loan_balances.balance.sum()
d["cash"] = 18_685

# Compute other assets excluding depreciation

assets = 0 
tax_dir = ROOT / "taxes"
for subdir in sorted(tax_dir.iterdir()):
    for path in subdir.iterdir():
        if path.name == "expenses_other.csv":
            asset = (
                pd.read_csv(path)
                .loc[lambda x: x.is_asset.str.contains("yes", case=False)]
                .amount
                .sum()
            )
            assets += asset

d["non_loan_assets"] = assets

f = (
    pd.DataFrame(d, index=[0])
    .round(0)
    .assign(total_assets=lambda x: x.sum(axis=1))
)
f.T