Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@ I think I've got things running smoothly and fixed some major bugs, but feel fre

See more info at https://academicpages.github.io/

## Accounting Analytics Dashboard

This repository now includes a Python-based accounting analytics application located in `accounting_app/`. The tool ingests CSV or Excel exports from your accounting system, validates and cleans the data, generates financial statements, and exposes an interactive Streamlit dashboard complete with automated insights and export capabilities.

### Features

- Multi-file CSV/Excel ingestion with automatic sheet handling and column standardisation.
- Data cleaning (duplicate removal, missing value handling) and validation checks (e.g., debit vs credit balancing, outlier detection).
- Financial statement generation: Income Statement, Balance Sheet, Cash Flow Statement, and Trial Balance.
- Analytical tooling for trend, variance, ratio, budget vs actual, and aging analysis.
- Interactive dashboard with KPI cards, Plotly visualisations, drill-down filters, preset views by role, dark/light mode toggle, and PDF/CSV export.
- Privacy-friendly operation: uploaded files are processed in temporary folders that are deleted after ingestion.

### Getting Started

1. Create and activate a virtual environment (optional but recommended).
2. Install dependencies with `pip install -r requirements.txt`.
3. Launch the dashboard using `streamlit run accounting_app/dashboard.py`.
4. Upload one or more accounting files (see the templates in `sample_data/` for the expected format) and optionally a budget file to explore the reports.

Sample datasets are provided in `sample_data/dummy_transactions.csv` and `sample_data/dummy_budget.csv` for quick experimentation.

## To run locally (not on GitHub Pages, to serve on your own computer)

1. Clone the repository and made updates as detailed above
Expand Down
12 changes: 12 additions & 0 deletions accounting_app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Accounting analytics package exposing core utilities."""

from . import analytics, data_cleaner, data_loader, visualizations
from .accounting_engine import generate_statements

__all__ = [
"analytics",
"data_cleaner",
"data_loader",
"visualizations",
"generate_statements",
]
130 changes: 130 additions & 0 deletions accounting_app/accounting_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Core accounting calculations and statement generation."""
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Optional

import pandas as pd
from pandas import DataFrame

from . import utils


@dataclass
class StatementResult:
name: str
data: DataFrame
metadata: Dict[str, str]


def _filter_period(df: DataFrame, start_date: Optional[datetime], end_date: Optional[datetime]) -> DataFrame:
if start_date:
df = df[df["date"] >= pd.Timestamp(start_date)]
if end_date:
df = df[df["date"] <= pd.Timestamp(end_date)]
return df


def _category_totals(df: DataFrame) -> DataFrame:
if "category" not in df.columns:
return pd.DataFrame(columns=["category", "amount"])
totals = df.groupby("category")["amount"].sum().reset_index()
totals = totals.sort_values(by="amount", ascending=False)
return totals


def generate_income_statement(df: DataFrame, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None) -> StatementResult:
frame = _filter_period(df, start_date, end_date)
if "category" not in frame.columns:
frame["category"] = frame["account"].map(utils.infer_category)
revenue = frame[frame["category"].str.contains("Revenue", case=False, na=False)]
expense = frame[frame["category"].str.contains("Expense", case=False, na=False)]
other = frame[~frame.index.isin(revenue.index.union(expense.index))]
summary = pd.DataFrame(
{
"Category": ["Revenue", "Expense", "Net Income"],
"Amount": [revenue["amount"].sum(), expense["amount"].sum(), revenue["amount"].sum() - expense["amount"].sum()],
}
)
detail = pd.concat(
{
"Revenue": _category_totals(revenue),
"Expense": _category_totals(expense),
"Other": _category_totals(other),
},
names=["Section"],
).reset_index(level=0)
return StatementResult(
name="Income Statement",
data=summary,
metadata={"detail": detail.to_json(orient="records")},
)


def generate_balance_sheet(df: DataFrame, as_of: Optional[datetime] = None) -> StatementResult:
frame = df.copy()
if as_of:
frame = frame[frame["date"] <= pd.Timestamp(as_of)]
if "balance" not in frame.columns:
frame["balance"] = frame["amount"].cumsum()
pivot = frame.groupby("category")["balance"].sum()
assets = pivot.filter(regex="Asset", axis=0).sum()
liabilities = pivot.filter(regex="Liability", axis=0).sum()
equity = pivot.filter(regex="Equity", axis=0).sum()
sheet = pd.DataFrame(
{
"Category": ["Assets", "Liabilities", "Equity"],
"Amount": [assets, liabilities, equity],
}
)
sheet["Amount"].fillna(0.0, inplace=True)
return StatementResult("Balance Sheet", sheet, metadata={})


def generate_cash_flow(df: DataFrame, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None) -> StatementResult:
frame = _filter_period(df, start_date, end_date)
if "category" not in frame.columns:
frame["category"] = frame["account"].map(utils.infer_category)
frame = frame.sort_values(by="date")
operating = frame[frame["category"].str.contains("Expense|Revenue", case=False, na=False)]["amount"].sum()
investing = frame[frame["category"].str.contains("Asset", case=False, na=False)]["amount"].sum()
financing = frame[frame["category"].str.contains("Liability|Equity", case=False, na=False)]["amount"].sum()
cash_change = operating + investing + financing
cf = pd.DataFrame(
{
"Category": ["Operating Activities", "Investing Activities", "Financing Activities", "Net Change in Cash"],
"Amount": [operating, investing, financing, cash_change],
}
)
return StatementResult("Cash Flow Statement", cf, metadata={})


def generate_trial_balance(df: DataFrame, as_of: Optional[datetime] = None) -> StatementResult:
frame = df.copy()
if as_of:
frame = frame[frame["date"] <= pd.Timestamp(as_of)]
grouped = frame.groupby("account").agg({"debit": "sum", "credit": "sum"}).fillna(0.0)
grouped["net"] = grouped["debit"] - grouped["credit"]
grouped = grouped.reset_index()
return StatementResult("Trial Balance", grouped, metadata={})


def generate_statements(df: DataFrame, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None) -> Dict[str, StatementResult]:
statements = {
"income_statement": generate_income_statement(df, start_date, end_date),
"balance_sheet": generate_balance_sheet(df, end_date),
"cash_flow": generate_cash_flow(df, start_date, end_date),
"trial_balance": generate_trial_balance(df, end_date),
}
return statements


__all__ = [
"StatementResult",
"generate_income_statement",
"generate_balance_sheet",
"generate_cash_flow",
"generate_trial_balance",
"generate_statements",
]
134 changes: 134 additions & 0 deletions accounting_app/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Analytics layer providing ratios and trend insights."""
from __future__ import annotations

from typing import Dict, Optional

import numpy as np
import pandas as pd

from . import utils


def _ensure_period(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df["period"] = df["date"].dt.to_period("M").dt.to_timestamp()
return df


def revenue_vs_expense_trend(df: pd.DataFrame) -> pd.DataFrame:
df = _ensure_period(df)
pivot = df.pivot_table(values="amount", index="period", columns="category", aggfunc="sum", fill_value=0)
revenue = pivot.filter(regex="Revenue", axis=1).sum(axis=1)
expense = pivot.filter(regex="Expense", axis=1).sum(axis=1)
trend = pd.DataFrame({"Revenue": revenue, "Expense": expense})
trend["Net"] = trend["Revenue"] - trend["Expense"]
return trend.reset_index()


def month_over_month(df: pd.DataFrame, column: str = "amount") -> pd.DataFrame:
df = _ensure_period(df)
monthly = df.groupby("period")[column].sum()
mom = utils.rolling_growth(monthly).to_frame(name="mom_growth")
return mom.reset_index()


def year_over_year(df: pd.DataFrame, column: str = "amount") -> pd.DataFrame:
df = _ensure_period(df)
yearly = df.groupby(df["date"].dt.to_period("Y")).agg({column: "sum"})
yoy = yearly.pct_change().rename(columns={column: "yoy_growth"})
yoy.index = yoy.index.to_timestamp()
return yoy.reset_index().rename(columns={"date": "period"})


def ratio_analysis(df: pd.DataFrame) -> Dict[str, float]:
if "balance" not in df.columns:
df = df.sort_values("date").copy()
df["balance"] = df.groupby("account")["amount"].cumsum()
latest_period = df["date"].max()
balance_sheet = df[df["date"] == latest_period]
assets = balance_sheet[balance_sheet["category"].str.contains("Asset", na=False)]["balance"].sum()
liabilities = balance_sheet[balance_sheet["category"].str.contains("Liability", na=False)]["balance"].sum()
equity = balance_sheet[balance_sheet["category"].str.contains("Equity", na=False)]["balance"].sum()
revenue = df[df["category"].str.contains("Revenue", na=False)]["amount"].sum()
expense = df[df["category"].str.contains("Expense", na=False)]["amount"].sum()
net_income = revenue - expense
return {
"current_ratio": utils.safe_divide(assets, liabilities),
"quick_ratio": utils.safe_divide(assets - balance_sheet[balance_sheet["account"].str.contains("Inventory", na=False)]["balance"].sum(), liabilities),
"debt_to_equity": utils.safe_divide(liabilities, equity),
"net_margin": utils.safe_divide(net_income, revenue),
"gross_margin": utils.safe_divide(
revenue - df[df["account"].str.contains("COGS|Cost of Goods", case=False, na=False)]["amount"].sum(),
revenue,
),
}


def top_expenses(df: pd.DataFrame, limit: int = 10) -> pd.DataFrame:
expenses = df[df["category"].str.contains("Expense", case=False, na=False)]
grouped = expenses.groupby("account")["amount"].sum().abs().sort_values(ascending=False)
return grouped.head(limit).reset_index().rename(columns={"amount": "total"})


def top_revenue(df: pd.DataFrame, limit: int = 10) -> pd.DataFrame:
revenue = df[df["category"].str.contains("Revenue", case=False, na=False)]
grouped = revenue.groupby("account")["amount"].sum().sort_values(ascending=False)
return grouped.head(limit).reset_index().rename(columns={"amount": "total"})


def aging_analysis(df: pd.DataFrame, aging_column: str = "amount") -> pd.DataFrame:
df = df.copy()
df["days_outstanding"] = (pd.Timestamp.utcnow().normalize() - df["date"]).dt.days
bins = [0, 30, 60, 90, np.inf]
labels = ["0-30", "31-60", "61-90", "90+"]
df["aging_bucket"] = pd.cut(df["days_outstanding"], bins=bins, labels=labels, right=False)
aging = df.groupby("aging_bucket")[aging_column].sum().reset_index()
return aging


def budget_vs_actual(df: pd.DataFrame, budget_df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
if budget_df is None:
return None
df = _ensure_period(df)
budget = budget_df.copy()
if "date" in budget.columns:
budget["date"] = utils.parse_dates(budget["date"])
budget["period"] = budget["date"].dt.to_period("M").dt.to_timestamp()
else:
raise ValueError("Budget data must contain a date column")
actual = df.groupby("period")["amount"].sum().reset_index()
plan = budget.groupby("period")["amount"].sum().reset_index()
merged = pd.merge(actual, plan, on="period", how="outer", suffixes=("_actual", "_budget")).fillna(0)
merged["variance"] = merged["amount_actual"] - merged["amount_budget"]
merged["variance_pct"] = utils.safe_divide(merged["variance"], merged["amount_budget"].replace({0: np.nan}))
return merged


def automated_insights(df: pd.DataFrame) -> Dict[str, str]:
trend = revenue_vs_expense_trend(df)
if trend.empty:
return {"summary": "Insufficient data for insights."}
latest = trend.iloc[-1]
previous = trend.iloc[-2] if len(trend) > 1 else None
summary = f"Net income for {latest['period']:%B %Y} was {utils.currency_format(latest['Net'])}."
if previous is not None and previous["Net"]:
change = utils.safe_divide(latest["Net"] - previous["Net"], previous["Net"])
summary += f" This represents a {change:.1%} change from the prior month."
expense_trend = top_expenses(df, limit=3)
if not expense_trend.empty:
top_expense = expense_trend.iloc[0]
summary += f" Top expense category: {top_expense['account']} ({utils.currency_format(top_expense['total'])})."
return {"summary": summary}


__all__ = [
"revenue_vs_expense_trend",
"month_over_month",
"year_over_year",
"ratio_analysis",
"top_expenses",
"top_revenue",
"aging_analysis",
"budget_vs_actual",
"automated_insights",
]
Loading