In [8]:
import pandas as pd
from datetime import date, timedelta

In [85]:
# read csv
df = pd.read_csv('transactions_sample.csv')
print(df)

          date        type      category              asset ticker  quantity  \
0   2026-01-05      income        salary  Monthly Allowance    NaN         0   
1   2026-01-08     expense          food              Lunch    NaN         0   
2   2026-01-10     expense     transport                MRT    NaN         0   
3   2026-01-12  investment    investment          Apple Inc   AAPL         2   
4   2026-01-15  investment    investment        NVIDIA Corp   NVDA         3   
5   2026-02-01      income        salary  Monthly Allowance    NaN         0   
6   2026-02-06     expense  subscription            Spotify    NaN         0   
7   2026-02-10  investment    investment          Apple Inc   AAPL         1   
8   2026-02-18  investment    investment        NVIDIA Corp   NVDA        -1   
9   2026-03-01      income        salary  Monthly Allowance    NaN         0   
10  2026-03-04     expense          food             Dinner    NaN         0   
11  2026-03-10  investment    investment

In [53]:
# data validation

# convert date to datetime
df["date"] = pd.to_datetime(df["date"])

# check missing values
print(df.isnull().sum())

# check invalid buys
invalid_buys = df[
    (df["type"] == "investment") &
    (df["quantity"] > 0) &
    (df["amount"] > 0)
]

print(invalid_buys)

#check amount consistency

investment_rows = df[df["type"] == "investment"]
calculated = investment_rows["quantity"] * investment_rows["price"]
print(investment_rows["amount"] + calculated)

date        0
type        0
category    0
asset       0
ticker      8
quantity    0
price       0
amount      0
account     0
notes       0
dtype: int64
Empty DataFrame
Columns: [date, type, category, asset, ticker, quantity, price, amount, account, notes]
Index: []
3     0.0
4     0.0
7     0.0
8     0.0
11    0.0
dtype: float64


In [58]:
#functions

def get_investment_transactions(df):
    investments = df[df["type"] == 'investment']
    return investments

def get_expense(df):
    expenses = df[df["type"] == 'expense']
    return expenses

def get_income(df):
    income = df[df["type"] == 'income']
    return income
print(get_income(df)["amount"].sum())

def filter_by_date(df, start, end):
    filtered_df = df[
        (df["date"] >= start) &
        (df["date"] <= end)
    ]
    return filtered_df

# function calls
print(get_investment_transactions(df)["amount"].sum())
print(get_expense(df)["amount"].sum())
print(get_income(df)["amount"].sum())
print(filter_by_date(df, '2026-01-12', '2026-03-01'))

3600.0
-1010.0
-198.68
3600.0
        date        type      category              asset ticker  quantity  \
3 2026-01-12  investment    investment          Apple Inc   AAPL         2   
4 2026-01-15  investment    investment        NVIDIA Corp   NVDA         3   
5 2026-02-01      income        salary  Monthly Allowance    NaN         0   
6 2026-02-06     expense  subscription            Spotify    NaN         0   
7 2026-02-10  investment    investment          Apple Inc   AAPL         1   
8 2026-02-18  investment    investment        NVIDIA Corp   NVDA        -1   
9 2026-03-01      income        salary  Monthly Allowance    NaN         0   

   price   amount         account                 notes  
3  180.0  -360.00       brokerage      Initial AAPL buy  
4  150.0  -450.00       brokerage      Initial NVDA buy  
5    0.0  1200.00  bank-spendable    February allowance  
6    0.0    -9.98  bank-spendable  Monthly subscription  
7  175.0  -175.00       brokerage           Add to AAPL

In [None]:
# day 2 --  brainstorm

# investment DataFrame 
# asset, ticker, quantity, price, amount 
# ticker, qty held, capital invested, avg buy price

In [162]:
investment_df = get_investment_transactions(df)[["asset","ticker","price","quantity","amount"]]
print(investment_df.to_string(index=False))

# validate investments 
def validate(df):
    invalid_investments = df[
        (df["price"] * df["quantity"] != abs(df["amount"])) |
        (df["price"] <= 0)
    ]
    return invalid_investments
print(validate(investment_df))

      asset ticker  price  quantity  amount
  Apple Inc   AAPL  180.0         2  -360.0
NVIDIA Corp   NVDA  150.0         3  -450.0
  Apple Inc   AAPL  175.0         1  -175.0
NVIDIA Corp   NVDA  165.0        -1   165.0
  Apple Inc   AAPL  190.0         1  -190.0
         asset ticker  price  quantity  amount
8  NVIDIA Corp   NVDA  165.0        -1   165.0


In [157]:
# holdings
investment_df = investment_df.drop(columns=["price"])
group = investment_df.groupby("ticker").agg({
    "asset": "first",
    "quantity": "sum",
    "amount": "sum"
})

print(group)

              asset  quantity  amount
ticker                               
AAPL      Apple Inc         4  -725.0
NVDA    NVIDIA Corp         2  -285.0


In [158]:
# validate quantity cannot be negative 
def check_quantity(df):
    invalid_investments = df[
        df["quantity"] < 0
    ]
    return invalid_investments

print(check_quantity(group)) # return empty dataframe if no negative quantity

Empty DataFrame
Columns: [asset, quantity, amount]
Index: []


In [161]:
# handle scenario when quantity is zero 
holdings = group[group["quantity"] > 0].copy()

# validate average price is correct

holdings["avg_price"] = abs(holdings["amount"] / holdings["quantity"])
holdings["net_exposure"] = abs(holdings["amount"])
holdings = holdings.drop(columns=["amount"])

# capital invested
capital_invested = investment_df[investment_df["amount"] < 0].groupby("ticker").sum()
holdings["capital_invested"] = abs(capital_invested["amount"].reindex(holdings.index))

print(holdings)

              asset  quantity  avg_price  net_exposure  capital_invested
ticker                                                                  
AAPL      Apple Inc         4     181.25         725.0             725.0
NVDA    NVIDIA Corp         2     142.50         285.0             450.0
