# QBO Financial Anomaly Detection with the Deno Kernel

This notebook simulates QuickBooks Online (QBO)-style journal entries, engineers a
16-feature representation, and trains a TypeScript Isolation Forest to flag
suspicious bookkeeping activity.


## Getting set up

1. Install Deno **v1.45.4** or newer.
2. Register the kernel once with `deno jupyter --unstable --install`.
3. Launch JupyterLab and open this notebook with the **Deno** kernel (language: TypeScript).
4. Run the cells from top to bottom — utilities load in the first code cell, and the
   remaining cells generate data, train the forest, and summarize the top anomalies.


In [32]:

// Utility and data-generation helpers for QBO-style anomaly detection.
type FeatureVector = number[];

type RandomGenerator = {
  uniform(): number;
  normal(mean: number, stdDev: number): number;
};

interface IsolationForestOptions {
  nTrees: number;
  sampleSize: number;
  heightLimit?: number;
  seed: number;
}

interface IsolationForestModel {
  score(vector: FeatureVector): number;
  pathLength(vector: FeatureVector): number;
}

type IsolationTreeNode =
  | { kind: 'external'; size: number }
  | {
      kind: 'internal';
      size: number;
      feature: number;
      splitValue: number;
      left: IsolationTreeNode;
      right: IsolationTreeNode;
    };

type PostingType = 'Debit' | 'Credit';

type AccountType =
  | 'AccountsReceivable'
  | 'AccountsPayable'
  | 'Revenue'
  | 'Expense'
  | 'Bank'
  | 'CostOfGoodsSold'
  | 'OtherCurrentLiability'
  | 'OtherCurrentAsset'
  | 'PayrollExpense'
  | 'SalesTax'
  | 'Equity';

type QboEntityType = 'Customer' | 'Vendor' | 'Employee' | 'Project' | 'None';

type CurrencyCode = 'USD' | 'CAD' | 'EUR' | 'GBP';

interface QboLineItem {
  accountType: AccountType;
  accountName: string;
  postingType: PostingType;
  amount: number;
  classRef?: string;
  locationRef?: string;
  entityType: QboEntityType;
  entityRef?: string;
}

interface QboJournalEntry {
  docNumber: string;
  txnDate: string;
  currency: CurrencyCode;
  exchangeRate: number;
  memo: string;
  privateNote: string;
  locationRef?: string;
  source: 'billing' | 'payroll' | 'adjustment' | 'inventory';
  hasAttachments: boolean;
  isReclassification: boolean;
  lines: QboLineItem[];
  isAnomaly: boolean;
}

interface QboSyntheticOptions {
  seed: number;
  entries: number;
  anomalyRatio: number;
  startSequence: number;
  fiscalYear: number;
  foreignCurrencyShare: number;
}

interface QboFeatureDataset {
  entries: QboJournalEntry[];
  featureMatrix: FeatureVector[];
  featureNames: string[];
  labels: boolean[];
  options: QboSyntheticOptions;
}

interface PcaProjection {
  coordinates: FeatureVector[];
  components: FeatureVector[];
  mean: FeatureVector;
}

interface AnomalyUtils {
  trainIsolationForest: (
    data: FeatureVector[],
    options?: Partial<IsolationForestOptions>,
  ) => IsolationForestModel;
  createQboFeatureDataset: (
    options?: Partial<QboSyntheticOptions>,
  ) => QboFeatureDataset;
  projectTo2D: (data: FeatureVector[]) => PcaProjection;
}

function createRandom(seed: number): RandomGenerator {
  let state = seed >>> 0;
  const next = () => {
    state += 0x6d2b79f5;
    let t = Math.imul(state ^ (state >>> 15), 1 | state);
    t ^= t + Math.imul(t ^ (t >>> 7), 61 | t);
    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
  };
  return {
    uniform() {
      return next() || 1e-12;
    },
    normal(mean: number, stdDev: number) {
      const u1 = next() || 1e-12;
      const u2 = next();
      const magnitude = Math.sqrt(-2 * Math.log(u1));
      const angle = 2 * Math.PI * u2;
      const z0 = magnitude * Math.cos(angle);
      return mean + stdDev * z0;
    },
  };
}

const accountCatalog: ReadonlyArray<{ name: string; type: AccountType; defaultPosting: PostingType }> = [
  { name: 'Accounts Receivable', type: 'AccountsReceivable', defaultPosting: 'Debit' },
  { name: 'Accounts Payable', type: 'AccountsPayable', defaultPosting: 'Credit' },
  { name: 'Checking', type: 'Bank', defaultPosting: 'Credit' },
  { name: 'Savings', type: 'Bank', defaultPosting: 'Credit' },
  { name: 'Sales Revenue', type: 'Revenue', defaultPosting: 'Credit' },
  { name: 'Deferred Revenue', type: 'OtherCurrentLiability', defaultPosting: 'Credit' },
  { name: 'Cost of Goods Sold', type: 'CostOfGoodsSold', defaultPosting: 'Debit' },
  { name: 'Operating Expenses', type: 'Expense', defaultPosting: 'Debit' },
  { name: 'Marketing Expense', type: 'Expense', defaultPosting: 'Debit' },
  { name: 'Payroll Expense', type: 'PayrollExpense', defaultPosting: 'Debit' },
  { name: 'Inventory Asset', type: 'OtherCurrentAsset', defaultPosting: 'Debit' },
  { name: 'Sales Tax Payable', type: 'SalesTax', defaultPosting: 'Credit' },
  { name: 'Retained Earnings', type: 'Equity', defaultPosting: 'Credit' },
];

const classCatalog = ['Fulfillment', 'Enterprise', 'Self-Serve', 'R&D', 'Marketing'];
const locationCatalog = ['San Francisco', 'New York', 'EMEA Hub', 'Remote'];

const entityCatalog: Record<Exclude<QboEntityType, 'None'>, string[]> = {
  Customer: ['Acme Co', 'Northwind', 'Globex', 'Initech'],
  Vendor: ['Paper Partners', 'Compute Cloud', 'Supply Depot'],
  Employee: ['Taylor Reed', 'Jordan Diaz', 'Priya Shah'],
  Project: ['Q1 Launch', 'Migration Wave', 'Fiscal Close'],
};

const qboDefaults: QboSyntheticOptions = {
  seed: 498,
  entries: 480,
  anomalyRatio: 0.14,
  startSequence: 4200,
  fiscalYear: 2025,
  foreignCurrencyShare: 0.1,
};

const QBO_FEATURE_NAMES = [
  'doc_number_length',
  'doc_sequence_gap',
  'day_of_month',
  'month_index',
  'is_month_end',
  'line_count',
  'unique_accounts',
  'total_debit',
  'imbalance',
  'avg_line_amount',
  'std_line_amount',
  'ar_line_ratio',
  'ap_line_ratio',
  'class_coverage',
  'memo_length',
  'foreign_currency_flag',
];

function trainIsolationForestImpl(
  data: FeatureVector[],
  options: Partial<IsolationForestOptions> = {},
): IsolationForestModel {
  if (data.length === 0) {
    throw new Error('Isolation Forest requires at least one sample.');
  }

  const dims = data[0].length;
  const defaults: IsolationForestOptions = {
    nTrees: 110,
    sampleSize: Math.min(180, data.length),
    seed: 90210,
  };
  const config = { ...defaults, ...options };
  const sampleSize = Math.min(config.sampleSize, data.length);
  const heightLimit = options.heightLimit ?? Math.ceil(Math.log2(Math.max(sampleSize, 2)));
  const rng = createRandom(config.seed);

  const trees: IsolationTreeNode[] = [];
  for (let t = 0; t < config.nTrees; t += 1) {
    const sample = randomSample(data, sampleSize, rng);
    trees.push(buildTree(sample, 0, heightLimit, dims, rng));
  }

  const c = averagePathLength(sampleSize);

  const meanPathLength = (vector: FeatureVector) =>
    trees.reduce((sum, tree) => sum + traversePath(vector, tree, 0), 0) / trees.length;

  return {
    score(vector: FeatureVector) {
      const path = meanPathLength(vector);
      return Math.pow(2, -path / c);
    },
    pathLength(vector: FeatureVector) {
      return meanPathLength(vector);
    },
  };
}

function buildTree(
  sample: FeatureVector[],
  height: number,
  heightLimit: number,
  dims: number,
  rng: RandomGenerator,
): IsolationTreeNode {
  if (height >= heightLimit || sample.length <= 1) {
    return { kind: 'external', size: sample.length };
  }

  const feature = pickSplittableFeature(sample, dims, rng);
  if (feature === null) {
    return { kind: 'external', size: sample.length };
  }

  const values = sample.map((vector) => vector[feature]);
  const min = Math.min(...values);
  const max = Math.max(...values);
  if (min === max) {
    return { kind: 'external', size: sample.length };
  }

  const splitValue = min + rng.uniform() * (max - min);
  const left: FeatureVector[] = [];
  const right: FeatureVector[] = [];
  for (const vector of sample) {
    if (vector[feature] < splitValue) {
      left.push(vector);
    } else {
      right.push(vector);
    }
  }

  if (left.length === 0 || right.length === 0) {
    return { kind: 'external', size: sample.length };
  }

  return {
    kind: 'internal',
    size: sample.length,
    feature,
    splitValue,
    left: buildTree(left, height + 1, heightLimit, dims, rng),
    right: buildTree(right, height + 1, heightLimit, dims, rng),
  };
}

function pickSplittableFeature(
  sample: FeatureVector[],
  dims: number,
  rng: RandomGenerator,
): number | null {
  const order = Array.from({ length: dims }, (_, index) => index);
  shuffle(order, rng);
  for (const dim of order) {
    const values = sample.map((vector) => vector[dim]);
    if (Math.min(...values) !== Math.max(...values)) {
      return dim;
    }
  }
  return null;
}

function traversePath(
  vector: FeatureVector,
  node: IsolationTreeNode,
  depth: number,
): number {
  if (node.kind === 'external') {
    return depth + averagePathLength(node.size);
  }
  if (vector[node.feature] < node.splitValue) {
    return traversePath(vector, node.left, depth + 1);
  }
  return traversePath(vector, node.right, depth + 1);
}

function averagePathLength(size: number): number {
  if (size <= 1) {
    return 0;
  }
  return 2 * harmonic(size - 1) - (2 * (size - 1)) / size;
}

function harmonic(n: number): number {
  let sum = 0;
  for (let i = 1; i <= n; i += 1) {
    sum += 1 / i;
  }
  return sum;
}

function randomSample(
  data: FeatureVector[],
  sampleSize: number,
  rng: RandomGenerator,
): FeatureVector[] {
  if (sampleSize >= data.length) {
    return data.slice();
  }
  const indices = Array.from({ length: data.length }, (_, index) => index);
  shuffle(indices, rng);
  return indices.slice(0, sampleSize).map((index) => data[index]);
}

function shuffle(values: number[], rng: RandomGenerator): void {
  for (let i = values.length - 1; i > 0; i -= 1) {
    const j = Math.floor(rng.uniform() * (i + 1));
    [values[i], values[j]] = [values[j], values[i]];
  }
}

function createQboFeatureDatasetImpl(
  options: Partial<QboSyntheticOptions> = {},
): QboFeatureDataset {
  const config = { ...qboDefaults, ...options };
  const entries = generateQboJournalEntries(config);
  const featureMatrix = entries.map((entry, index) => computeFeatureVector(entry, index, config));
  const labels = entries.map((entry) => entry.isAnomaly);

  return {
    entries,
    featureMatrix,
    featureNames: [...QBO_FEATURE_NAMES],
    labels,
    options: config,
  };
}

function generateQboJournalEntries(config: QboSyntheticOptions): QboJournalEntry[] {
  const rng = createRandom(config.seed);
  const entries: QboJournalEntry[] = [];
  const buildDocNumber = (sequence: number) => `JE-${config.fiscalYear}-${sequence.toString().padStart(5, '0')}`;

  for (let i = 0; i < config.entries; i += 1) {
    const candidateAnomaly = rng.uniform() < config.anomalyRatio;
    const scenario = candidateAnomaly ? Math.floor(rng.uniform() * 4) : -1;

    const monthIndex = i % 12;
    const day = (i * 7) % 28 + 1;
    const txnDate = new Date(Date.UTC(config.fiscalYear, monthIndex, day));

    let docSequence = config.startSequence + i;
    if (scenario === 0 && i > 0) {
      docSequence = config.startSequence + i - 1;
    } else if (scenario === 1) {
      docSequence = config.startSequence + i + 3;
    }

    let currency: CurrencyCode = 'USD';
    let exchangeRate = 1;
    if (rng.uniform() < config.foreignCurrencyShare || scenario === 2) {
      const codes: CurrencyCode[] = ['USD', 'CAD', 'EUR', 'GBP'];
      currency = codes[Math.floor(rng.uniform() * codes.length)];
      if (currency === 'CAD') exchangeRate = 0.75;
      if (currency === 'EUR') exchangeRate = 1.07;
      if (currency === 'GBP') exchangeRate = 1.27;
    }

    const locationRef = rng.uniform() < 0.6 ? pickOne(locationCatalog, rng) : undefined;
    const memo = scenario === 3 ? 'Manual adjustment – investigate immediately' : sampleMemo(rng);
    const privateNote = rng.uniform() < 0.4 ? samplePrivateNote(rng) : '';
    const source = scenario === 3 ? 'adjustment' : pickOne(['billing', 'payroll', 'inventory', 'adjustment'], rng);
    const hasAttachments = rng.uniform() < 0.35;
    const isReclassification = scenario === 3;

    const lines = buildLines({ rng, scenario, currency, exchangeRate, locationRef });
    const isAnomaly = candidateAnomaly;

    entries.push({
      docNumber: buildDocNumber(docSequence),
      txnDate: txnDate.toISOString(),
      currency,
      exchangeRate,
      memo,
      privateNote,
      locationRef,
      source,
      hasAttachments,
      isReclassification,
      lines,
      isAnomaly,
    });
  }

  return entries;
}

function buildLines(args: {
  rng: RandomGenerator;
  scenario: number;
  currency: CurrencyCode;
  exchangeRate: number;
  locationRef?: string;
}): QboLineItem[] {
  const { rng, scenario, locationRef } = args;
  const baseAmount = Math.max(45, 250 + rng.normal(0, 40));
  const debitAccount = pickByPosting('Debit', rng);
  const creditAccount = pickByPosting('Credit', rng);

  const classRef = rng.uniform() < 0.65 ? pickOne(classCatalog, rng) : undefined;
  const entityDebit = pickEntity(debitAccount.type, rng);
  const entityCredit = pickEntity(creditAccount.type, rng);

  const debitLine: QboLineItem = {
    accountType: debitAccount.type,
    accountName: debitAccount.name,
    postingType: 'Debit',
    amount: baseAmount,
    classRef,
    locationRef,
    entityType: entityDebit?.type ?? 'None',
    entityRef: entityDebit?.ref,
  };

  const creditLine: QboLineItem = {
    accountType: creditAccount.type,
    accountName: creditAccount.name,
    postingType: 'Credit',
    amount: baseAmount,
    classRef: rng.uniform() < 0.4 ? classRef : undefined,
    locationRef,
    entityType: entityCredit?.type ?? 'None',
    entityRef: entityCredit?.ref,
  };

  const lines: QboLineItem[] = [debitLine, creditLine];

  if (rng.uniform() < 0.35) {
    const taxLine: QboLineItem = {
      accountType: 'SalesTax',
      accountName: 'Sales Tax Payable',
      postingType: 'Credit',
      amount: Math.max(5, baseAmount * 0.08 + rng.normal(0, 2)),
      classRef,
      locationRef,
      entityType: 'None',
    };
    debitLine.amount += taxLine.amount;
    lines.push(taxLine);
  }

  if (scenario === 2) {
    creditLine.amount *= 0.72;
  }

  if (scenario === 3) {
    const extra = Math.max(500, baseAmount * 8);
    debitLine.amount += extra;
    lines.push({
      accountType: 'OtherCurrentLiability',
      accountName: 'Deferred Revenue',
      postingType: 'Credit',
      amount: extra,
      classRef,
      locationRef,
      entityType: 'None',
    });
  }

  return lines;
}

function pickByPosting(posting: PostingType, rng: RandomGenerator) {
  const candidates = accountCatalog.filter((account) => account.defaultPosting === posting);
  return pickOne(candidates, rng);
}

function pickOne<T>(values: ReadonlyArray<T>, rng: RandomGenerator): T {
  return values[Math.floor(rng.uniform() * values.length)];
}

function pickEntity(accountType: AccountType, rng: RandomGenerator): { type: QboEntityType; ref: string } | undefined {
  if (accountType === 'AccountsReceivable') {
    return { type: 'Customer', ref: pickOne(entityCatalog.Customer, rng) };
  }
  if (accountType === 'AccountsPayable') {
    return { type: 'Vendor', ref: pickOne(entityCatalog.Vendor, rng) };
  }
  if (accountType === 'PayrollExpense') {
    return { type: 'Employee', ref: pickOne(entityCatalog.Employee, rng) };
  }
  if (rng.uniform() < 0.2) {
    return { type: 'Project', ref: pickOne(entityCatalog.Project, rng) };
  }
  return undefined;
}

function sampleMemo(rng: RandomGenerator): string {
  const options = [
    'Monthly accrual entry',
    'Reclassify marketing spend',
    'Inventory adjustment',
    'Payroll true-up',
    'Subscription revenue recognition',
  ];
  return pickOne(options, rng);
}

function samplePrivateNote(rng: RandomGenerator): string {
  const options = [
    'Reviewed by controllership',
    'Pending approval from CFO',
    'Attach receipt before close',
    'Cross-check with vendor statement',
  ];
  return pickOne(options, rng);
}

function computeFeatureVector(
  entry: QboJournalEntry,
  index: number,
  options: QboSyntheticOptions,
): FeatureVector {
  const docDigitsMatch = entry.docNumber.match(/(\d+)$/);
  const docDigits = docDigitsMatch ? Number(docDigitsMatch[1]) : options.startSequence + index;
  const expectedSequence = options.startSequence + index;
  const txn = new Date(entry.txnDate);

  const lineCount = entry.lines.length;
  const uniqueAccounts = new Set(entry.lines.map((line) => line.accountName)).size;
  const debitTotal = sumLines(entry.lines, 'Debit', entry.exchangeRate, entry.currency);
  const creditTotal = sumLines(entry.lines, 'Credit', entry.exchangeRate, entry.currency);
  const imbalance = Math.abs(debitTotal - creditTotal);
  const amounts = entry.lines.map((line) => normalizeAmount(line.amount, entry.exchangeRate, entry.currency));
  const avgLine = amounts.reduce((sum, value) => sum + value, 0) / lineCount;
  const variance = amounts.reduce((sum, value) => sum + (value - avgLine) ** 2, 0) / lineCount;
  const stdLine = Math.sqrt(variance);
  const arLineRatio = ratioByType(entry.lines, 'AccountsReceivable');
  const apLineRatio = ratioByType(entry.lines, 'AccountsPayable');
  const classCoverage = entry.lines.filter((line) => line.classRef).length / lineCount;

  return [
    entry.docNumber.length,
    docDigits - expectedSequence,
    txn.getUTCDate(),
    txn.getUTCMonth() + 1,
    txn.getUTCDate() >= 28 ? 1 : 0,
    lineCount,
    uniqueAccounts,
    debitTotal,
    imbalance,
    avgLine,
    stdLine,
    arLineRatio,
    apLineRatio,
    classCoverage,
    entry.memo.length,
    entry.currency === 'USD' ? 0 : 1,
  ];
}

function sumLines(
  lines: QboLineItem[],
  postingType: PostingType,
  exchangeRate: number,
  currency: CurrencyCode,
): number {
  return lines
    .filter((line) => line.postingType === postingType)
    .reduce((sum, line) => sum + normalizeAmount(line.amount, exchangeRate, currency), 0);
}

function normalizeAmount(amount: number, exchangeRate: number, currency: CurrencyCode): number {
  if (currency === 'USD') {
    return amount;
  }
  return amount * exchangeRate;
}

function ratioByType(lines: QboLineItem[], type: AccountType): number {
  if (lines.length === 0) return 0;
  const hits = lines.filter((line) => line.accountType === type).length;
  return hits / lines.length;
}

function projectTo2DImpl(data: FeatureVector[]): PcaProjection {
  if (data.length === 0) {
    return { coordinates: [], components: [], mean: [] };
  }
  const dim = data[0].length;
  const mean = new Array(dim).fill(0);
  for (const row of data) {
    for (let i = 0; i < dim; i += 1) {
      mean[i] += row[i];
    }
  }
  for (let i = 0; i < dim; i += 1) {
    mean[i] /= data.length;
  }

  const centered = data.map((row) => row.map((value, index) => value - mean[index]));
  const covariance = computeCovariance(centered);

  const components: FeatureVector[] = [];
  let workingCovariance = covariance.map((row) => row.slice());

  for (let componentIndex = 0; componentIndex < Math.min(2, dim); componentIndex += 1) {
    let vector = new Array(dim).fill(0).map(() => Math.random() - 0.5);
    vector = normalizeVector(vector);

    for (let iteration = 0; iteration < 200; iteration += 1) {
      let next = matVecMul(workingCovariance, vector);

      for (const prev of components) {
        const dotProd = dot(next, prev);
        for (let i = 0; i < dim; i += 1) {
          next[i] -= dotProd * prev[i];
        }
      }

      const norm = Math.sqrt(next.reduce((sum, value) => sum + value * value, 0));
      if (!Number.isFinite(norm) || norm === 0) {
        break;
      }
      next = next.map((value) => value / norm);
      vector = next;
    }

    const eigenvalue = dot(vector, matVecMul(covariance, vector));
    components.push(vector);
    workingCovariance = deflateCovariance(workingCovariance, vector, eigenvalue);
  }

  while (components.length < 2) {
    const basis = new Array(dim).fill(0);
    basis[components.length] = 1;
    components.push(basis);
  }

  const coordinates = centered.map((row) =>
    components.map((component) => dot(row, component)),
  );

  return { coordinates, components, mean };
}

function computeCovariance(data: FeatureVector[]): number[][] {
  const dim = data[0].length;
  const covariance = Array.from({ length: dim }, () => Array(dim).fill(0));
  for (const row of data) {
    for (let i = 0; i < dim; i += 1) {
      for (let j = i; j < dim; j += 1) {
        const value = row[i] * row[j];
        covariance[i][j] += value;
        if (i !== j) {
          covariance[j][i] += value;
        }
      }
    }
  }
  const scale = 1 / Math.max(1, data.length - 1);
  for (let i = 0; i < dim; i += 1) {
    for (let j = 0; j < dim; j += 1) {
      covariance[i][j] *= scale;
    }
  }
  return covariance;
}

function matVecMul(matrix: number[][], vector: FeatureVector): FeatureVector {
  return matrix.map((row) => dot(row, vector));
}

function normalizeVector(vector: FeatureVector): FeatureVector {
  const norm = Math.sqrt(vector.reduce((sum, value) => sum + value * value, 0));
  if (norm === 0) {
    return vector.slice();
  }
  return vector.map((value) => value / norm);
}

function deflateCovariance(
  covariance: number[][],
  component: FeatureVector,
  eigenvalue: number,
): number[][] {
  const dim = covariance.length;
  const updated = covariance.map((row) => row.slice());
  for (let i = 0; i < dim; i += 1) {
    for (let j = 0; j < dim; j += 1) {
      updated[i][j] -= eigenvalue * component[i] * component[j];
    }
  }
  return updated;
}

function dot(a: FeatureVector, b: FeatureVector): number {
  let sum = 0;
  for (let i = 0; i < a.length; i += 1) {
    sum += a[i] * b[i];
  }
  return sum;
}

const utils: AnomalyUtils = {
  trainIsolationForest: trainIsolationForestImpl,
  createQboFeatureDataset: createQboFeatureDatasetImpl,
  projectTo2D: projectTo2DImpl,
};

const globalState = globalThis as typeof globalThis & { __anomalyUtils?: AnomalyUtils };
globalState.__anomalyUtils = utils;

const { trainIsolationForest, createQboFeatureDataset, projectTo2D } = globalState.__anomalyUtils;

console.log('✅ QBO anomaly utilities ready.');


✅ QBO anomaly utilities ready.


## Isolation Forest on QBO Journals

Generate a ledger-like dataset, train the Isolation Forest, and review the
highest scoring journal entries alongside engineered feature values.


In [33]:

// Generate a QBO-style journal dataset with engineered features.
type NotebookState = typeof globalThis & { qboDataset?: QboFeatureDataset };
const state = globalThis as NotebookState;

const qboDataset = createQboFeatureDataset({
  entries: 720,
  anomalyRatio: 0.16,
  foreignCurrencyShare: 0.12,
  seed: 5931,
});

state.qboDataset = qboDataset;
state.qboThreshold = 0.62;

await Deno.jupyter.md`Generated **${qboDataset.entries.length}** journal entries with **${qboDataset.featureNames.length}** engineered features.`;

const featureItems = qboDataset.featureNames
  .map((name) => `<li><code>${name}</code></li>`)
  .join('');

await Deno.jupyter.html`
  <details open>
    <summary>Feature catalog (${qboDataset.featureNames.length})</summary>
    <ul style="columns:2; gap:1.5rem; font-family:'JetBrains Mono','Fira Code',monospace; font-size:0.85rem;">
      ${featureItems}
    </ul>
  </details>
`;

const sample = qboDataset.entries[0];
console.log('Sample doc', sample.docNumber, sample.currency, sample.lines.length, 'lines');


Sample doc JE-2025-04200 USD 2 lines


In [34]:

// Train the Isolation Forest on QBO-style feature vectors.
type NotebookState = typeof globalThis & {
  qboDataset?: QboFeatureDataset;
  qboScores?: {
    rows: Array<{
      entry: QboJournalEntry;
      features: FeatureVector;
      label: boolean;
      score: number;
      path: number;
    }>;
    featureNames: string[];
  };
  qboProjection?: {
    points: Array<{
      x: number;
      y: number;
      score: number;
      label: boolean;
      entry: QboJournalEntry;
    }>;
    mean: FeatureVector;
    components: FeatureVector[];
  };
  qboThreshold?: number;
};

const state = globalThis as NotebookState;
if (!state.qboDataset) {
  throw new Error('Run the dataset cell first to populate qboDataset.');
}

const { featureMatrix, entries, labels, featureNames } = state.qboDataset;
const forest = trainIsolationForest(featureMatrix, {
  nTrees: 150,
  sampleSize: 220,
  seed: 90210,
});

const scoredRows = entries.map((entry, index) => ({
  entry,
  features: featureMatrix[index],
  label: labels[index],
  score: forest.score(featureMatrix[index]),
  path: forest.pathLength(featureMatrix[index]),
}));

scoredRows.sort((a, b) => b.score - a.score);
const threshold = state.qboThreshold ?? 0.62;
state.qboScores = { rows: scoredRows, featureNames };
state.qboThreshold = threshold;

const projection = projectTo2D(featureMatrix);
state.qboProjection = {
  points: projection.coordinates.map((coords, index) => ({
    x: coords[0],
    y: coords[1],
    score: scoredRows[index].score,
    label: labels[index],
    entry: entries[index],
  })),
  mean: projection.mean,
  components: projection.components,
};

const cutoff = 25;
const precision = scoredRows
  .slice(0, cutoff)
  .filter((row) => row.label)
  .length / cutoff;

console.log('Isolation Forest training complete.');
await Deno.jupyter.md`Isolation Forest trained on **${featureMatrix.length}** journal entries with ${featureNames.length} features. Precision@${cutoff} = ${(precision * 100).toFixed(1)}%. Threshold = ${threshold}.`;


Isolation Forest training complete.


Isolation Forest trained on **720** journal entries with 16 features. Precision@25 = 100.0%. Threshold = 0.62.

In [36]:

// Visualize the PCA projection with D3.
type NotebookState = typeof globalThis & {
  qboProjection?: {
    points: Array<{
      x: number;
      y: number;
      score: number;
      label: boolean;
      entry: QboJournalEntry;
    }>;
  };
  qboThreshold?: number;
};

const state = globalThis as NotebookState;
if (!state.qboProjection || !state.qboProjection.points) {
  throw new Error('Train the Isolation Forest to populate the projection.');
}

const points = state.qboProjection.points;
const threshold = state.qboThreshold ?? 0.62;

const dataset = points.map((point) => ({
  x: Number(point.x.toFixed(4)),
  y: Number(point.y.toFixed(4)),
  score: point.score,
  label: point.label,
  docNumber: point.entry.docNumber,
  date: point.entry.txnDate.slice(0, 10),
  currency: point.entry.currency,
}));

const payload = JSON.stringify(dataset);

await Deno.jupyter.html`
  <div id="qbo-scatter" style="margin-top: 16px"></div>
  <style>
    #qbo-scatter svg text {
      font-family: 'Inter', sans-serif;
      font-size: 12px;
    }
    .qbo-tooltip {
      position: absolute;
      background: rgba(15, 23, 42, 0.92);
      color: #f8fafc;
      padding: 8px 10px;
      border-radius: 8px;
      font-size: 12px;
      pointer-events: none;
      opacity: 0;
      transition: opacity 120ms ease-in-out;
      box-shadow: 0 10px 30px rgba(15, 23, 42, 0.35);
    }
  </style>
  <div class="qbo-tooltip" id="qbo-tooltip" role="status" aria-live="polite"></div>
  <script type="module">
    import * as d3 from "https://cdn.jsdelivr.net/npm/d3@7/+esm";

    const data = ${payload};
    const threshold = ${threshold};

    const container = document.getElementById('qbo-scatter');
    container.innerHTML = '';

    const width = 960;
    const height = 420;
    const margin = { top: 32, right: 24, bottom: 48, left: 56 };

    const svg = d3
      .select(container)
      .append('svg')
      .attr('viewBox', '0 0 ' + width + ' ' + height)
      .attr('width', '100%')
      .attr('height', height);

    const xExtent = d3.extent(data, (d) => d.x);
    const yExtent = d3.extent(data, (d) => d.y);

    const xScale = d3
      .scaleLinear()
      .domain(xExtent)
      .nice()
      .range([margin.left, width - margin.right]);

    const yScale = d3
      .scaleLinear()
      .domain(yExtent)
      .nice()
      .range([height - margin.bottom, margin.top]);

    const tooltip = document.getElementById('qbo-tooltip');

    const color = (d) => (d.label ? '#dc2626' : '#818cf8');
    const radius = (d) => 4 + Math.max(0, d.score - threshold) * 12;

    svg
      .append('g')
      .attr('transform', 'translate(0,' + (height - margin.bottom) + ')')
      .call(d3.axisBottom(xScale))
      .call((g) =>
        g
          .append('text')
          .attr('x', width - margin.right)
          .attr('y', 36)
          .attr('fill', '#cbd5f5')
          .attr('text-anchor', 'end')
          .text('PC1'),
      );

    svg
      .append('g')
      .attr('transform', 'translate(' + margin.left + ',0)')
      .call(d3.axisLeft(yScale))
      .call((g) =>
        g
          .append('text')
          .attr('x', -margin.left + 12)
          .attr('y', margin.top - 12)
          .attr('fill', '#cbd5f5')
          .attr('text-anchor', 'start')
          .text('PC2'),
      );

    svg
      .append('g')
      .selectAll('circle')
      .data(data)
      .join('circle')
      .attr('cx', (d) => xScale(d.x))
      .attr('cy', (d) => yScale(d.y))
      .attr('r', (d) => radius(d))
      .attr('fill', (d) => color(d))
      .attr('fill-opacity', 0.78)
      .attr('stroke', (d) => (d.score >= threshold ? '#f8fafc' : 'transparent'))
      .attr('stroke-width', 1.2)
      .on('mouseenter', (event, d) => {
        tooltip.style.opacity = '1';
        tooltip.textContent =
          d.docNumber + ' · ' + d.date + ' · score ' + d.score.toFixed(3) + ' · anomaly: ' + (d.label ? 'yes' : 'no');
      })
      .on('mousemove', (event) => {
        tooltip.style.left = event.pageX + 12 + 'px';
        tooltip.style.top = event.pageY - 28 + 'px';
      })
      .on('mouseleave', () => {
        tooltip.style.opacity = '0';
      });

    const legend = svg
      .append('g')
      .attr('transform', 'translate(' + margin.left + ',' + (margin.top - 12) + ')');

    const legendData = [
      { label: 'Ground-truth anomaly', color: '#dc2626' },
      { label: 'Normal entry', color: '#818cf8' },
    ];

    const legendGroup = legend
      .selectAll('g')
      .data(legendData)
      .join('g')
      .attr('transform', (_, i) => 'translate(' + i * 180 + ',0)');

    legendGroup
      .append('circle')
      .attr('r', 6)
      .attr('cx', 0)
      .attr('cy', 0)
      .attr('fill', (d) => d.color);

    legendGroup
      .append('text')
      .attr('x', 12)
      .attr('y', 4)
      .attr('fill', '#f8fafc')
      .text((d) => d.label);
  </script>
`;


In [37]:

// Display the highest scoring journal entries and key features.
type NotebookState = typeof globalThis & {
  qboScores?: {
    rows: Array<{
      entry: QboJournalEntry;
      features: FeatureVector;
      label: boolean;
      score: number;
      path: number;
    }>;
    featureNames: string[];
  };
  qboThreshold?: number;
};

const state = globalThis as NotebookState;
if (!state.qboScores) {
  throw new Error('Train the Isolation Forest before rendering results.');
}

const { rows, featureNames } = state.qboScores;
const topRows = rows.slice(0, 12);
const threshold = state.qboThreshold ?? 0.65;

const featureIndex = new Map(featureNames.map((name, idx) => [name, idx]));
const extract = (vector: FeatureVector, name: string) => {
  const index = featureIndex.get(name);
  if (index === undefined) return 0;
  return vector[index];
};

const tableRows = topRows
  .map((row) => {
    const featureMap = {
      lineCount: extract(row.features, 'line_count'),
      totalDebit: extract(row.features, 'total_debit'),
      imbalance: extract(row.features, 'imbalance'),
      arRatio: extract(row.features, 'ar_line_ratio'),
      apRatio: extract(row.features, 'ap_line_ratio'),
      classCoverage: extract(row.features, 'class_coverage'),
      memoLength: extract(row.features, 'memo_length'),
      fxFlag: extract(row.features, 'foreign_currency_flag'),
    };

    const predicted = row.score >= threshold;

    return `
      <tr>
        <td>${row.entry.docNumber}</td>
        <td>${row.entry.txnDate.slice(0, 10)}</td>
        <td>${row.entry.currency}</td>
        <td>${row.score.toFixed(3)}</td>
        <td>${predicted ? 'yes' : 'no'}</td>
        <td>${row.label ? 'yes' : 'no'}</td>
        <td>${featureMap.lineCount.toFixed(0)}</td>
        <td>${featureMap.totalDebit.toFixed(0)}</td>
        <td>${featureMap.imbalance.toFixed(1)}</td>
        <td>${featureMap.arRatio.toFixed(2)}</td>
        <td>${featureMap.apRatio.toFixed(2)}</td>
        <td>${featureMap.classCoverage.toFixed(2)}</td>
        <td>${featureMap.memoLength.toFixed(0)}</td>
        <td>${featureMap.fxFlag >= 0.5 ? 'yes' : 'no'}</td>
      </tr>
    `;
  })
  .join('');

await Deno.jupyter.html`
  <style>
    table.qbo-top-anomalies {
      border-collapse: collapse;
      width: 100%;
      max-width: 1100px;
      margin-top: 12px;
    }
    table.qbo-top-anomalies th,
    table.qbo-top-anomalies td {
      border: 1px solid #d1d5db;
      padding: 8px 10px;
      text-align: center;
      font-family: "JetBrains Mono", "Fira Code", monospace;
    }
    table.qbo-top-anomalies thead {
      background: #f1f5f9;
      color: #0f172a;
      text-transform: uppercase;
      letter-spacing: 0.04em;
      font-size: 0.75rem;
    }
  </style>
  <table class="qbo-top-anomalies" aria-label="Isolation Forest top QBO anomalies">
    <thead>
      <tr>
        <th scope="col">Doc #</th>
        <th scope="col">Date</th>
        <th scope="col">Currency</th>
        <th scope="col">Score</th>
        <th scope="col">Predicted</th>
        <th scope="col">Ground truth</th>
        <th scope="col">Line count</th>
        <th scope="col">Total debit</th>
        <th scope="col">Imbalance</th>
        <th scope="col">AR ratio</th>
        <th scope="col">AP ratio</th>
        <th scope="col">Class coverage</th>
        <th scope="col">Memo length</th>
        <th scope="col">FX entry</th>
      </tr>
    </thead>
    <tbody>
      ${tableRows}
    </tbody>
  </table>
`;


Doc #,Date,Currency,Score,Predicted,Ground truth,Line count,Total debit,Imbalance,AR ratio,AP ratio,Class coverage,Memo length,FX entry
JE-2025-04406,2025-03-15,USD,0.695,yes,yes,4,2710,0.0,0.0,0.25,0.75,43,no
JE-2025-04722,2025-07-15,GBP,0.694,yes,yes,3,3675,0.0,0.0,0.0,0.0,43,yes
JE-2025-04455,2025-04-22,USD,0.693,yes,yes,4,2429,0.0,0.0,0.25,0.75,43,no
JE-2025-04835,2025-12-22,USD,0.679,yes,yes,4,2412,0.0,0.0,0.0,0.0,43,no
JE-2025-04852,2025-05-01,USD,0.675,yes,yes,4,2121,0.0,0.25,0.0,1.0,43,no
JE-2025-04754,2025-03-15,GBP,0.669,yes,yes,3,2422,0.0,0.33,0.0,1.0,43,yes
JE-2025-04534,2025-11-15,USD,0.668,yes,yes,4,2626,0.0,0.0,0.0,0.75,43,no
JE-2025-04810,2025-11-15,USD,0.661,yes,yes,4,2904,0.0,0.0,0.0,1.0,43,no
JE-2025-04451,2025-12-22,USD,0.657,yes,yes,4,1814,0.0,0.0,0.0,0.75,43,no
JE-2025-04560,2025-01-01,USD,0.656,yes,yes,3,2815,0.0,0.0,0.0,1.0,43,no


## Next steps

- Replace the synthetic generator with actual QBO journal exports and reuse the feature pipeline.
- Tune Isolation Forest hyperparameters (`nTrees`, `sampleSize`, `heightLimit`) to hit your false-positive targets.
- Export the anomaly table to CSV for finance reviews or pipe the scores into downstream alerting.
- Add explainability by plotting per-feature contributions (e.g., SHAP) for top anomalies.
