In [7]:
import argparse
from pathlib import Path
import os
from datetime import datetime
from typing import Dict, Optional

import numpy_financial as npf
import pandas as pd
import nasdaqdatalink as ndl

In [8]:
CASH_EQUIVALENTS = {
    "VMFXX", "QACDS", "SWVXX", "SPAXX", "SNVXX", "FDRXX", 
    "FGXX", "SPRXX", "SNAXX", "VMMXX", "VMSXX"
}

TRANSACTION_TYPES = {
    "buy": {"BUY"},
    "sell": {"SELL", "LIQ"},
    "deposit": {"BNK", "DEP", "DEPOSIT"},
    "withdrawal": {"WDL", "WITHDRAWAL"},
    "dividend": {"DIV", "DIVIDEND", "CASH DIVIDEND", "CASHDIVIDEND", "DBS", "DBT"},
    "interest": {"INTEREST"},
    "reinvest": {"REINVEST"},
    "split": {"STK SPLT", "SPLT", "SPLIT"},
}

In [9]:
def load_transactions(csv_path: str, ignore_cash_equivalents: bool = True) -> pd.DataFrame:
    """Load and normalize transaction data."""
    transactions_dir = Path(csv_path)
    all_transactions = []
    for csv_file in sorted(transactions_dir.glob('*.csv')):
        # Read CSV 
        df = pd.read_csv(csv_file)
        # Normalize columns
        df["Trade Date"] = pd.to_datetime(df["Trade Date"])
        df["Type"] = df["Type"].fillna("").str.strip().str.upper()
        df["Ticker"] = df["Ticker"].fillna("").str.strip().str.upper()
        df["Amount USD"] = pd.to_numeric(df["Amount USD"], errors="coerce").fillna(0)
        df["Quantity"] = pd.to_numeric(df["Quantity"], errors="coerce").fillna(0)
        # Filter out cash equivalents
        if ignore_cash_equivalents:
            df = df[~df["Ticker"].isin(CASH_EQUIVALENTS)]
            if "Security Type" in df.columns:
                df = df[df["Security Type"].str.upper() != "MONEY MARKET"]
        all_transactions.append(df)
    transactions = pd.concat(all_transactions).sort_values("Trade Date").reset_index(drop=True)
    return transactions

def consolidate_transaction_types(df: pd.DataFrame) -> pd.DataFrame:
    """Map various transaction types to standard categories."""
    type_map = {}
    for category, types in TRANSACTION_TYPES.items():
        for t in types:
            type_map[t] = category.upper()
    
    df = df.copy()
    df["Type"] = df["Type"].replace(type_map).fillna(df["Type"])
    return df

In [10]:
df = load_transactions("/Users/bhargav/Git/investments/transactions")
df = consolidate_transaction_types(df)

In [12]:
df["Type"].unique()

array(['BUY', 'REINVEST', 'DIVIDEND', 'DEPOSIT', 'SPLIT', 'SELL',
       'INTEREST', 'WHT', 'CAP', 'CIL', 'ADR', 'DISTRIBUTION', 'FWT',
       'MER', 'EXCHANGE'], dtype=object)