In [1]:
import json
import random
from pathlib import Path
from typing import Dict, List, Tuple
from collections import Counter
from pprint import pprint

In [2]:
# --- Paths ---
BASELINE_PATH = Path("baseline_regex_outputs.json")
RAW_DATA_PATH = Path("primary_data.json")

GOLD_LIST_PATH = Path("gold_annotation_files.json")
GOLD_DIR = Path("gold_annotations")
RAW_TEXT_DIR = Path("gold_raw_text")

# --- Settings ---
SEED = 42
GOLD_N = 8
ENTITY_LABELS = ["PERSON", "TITLE", "ORG", "MONEY"]
SCHEMA_VERSION = "1.0"


In [3]:
def load_json(path: Path):
    return json.loads(path.read_text(encoding="utf-8"))

def save_json(path: Path, obj, *, indent: int = 2):
    path.write_text(json.dumps(obj, indent=indent), encoding="utf-8")

def ensure_dir(path: Path):
    path.mkdir(parents=True, exist_ok=True)

In [4]:
baseline_outputs = load_json(BASELINE_PATH)
raw_data = load_json(RAW_DATA_PATH)

print("Baseline filings:", len(baseline_outputs))
print("Raw filings:", len(raw_data))

workable_files = list(baseline_outputs.keys())
print("Workable filings:", len(workable_files))


Baseline filings: 38
Raw filings: 191
Workable filings: 38


In [5]:
def get_or_create_gold_list(workable_files: List[str], *, n: int, seed: int, path: Path) -> List[str]:
    if path.exists():
        gold_files = load_json(path)
        print("Loaded gold file list:", len(gold_files))
        return gold_files

    rng = random.Random(seed)
    gold_files = rng.sample(workable_files, n)
    save_json(path, gold_files)
    print("Saved gold annotation file list:", len(gold_files))
    return gold_files

gold_files = get_or_create_gold_list(workable_files, n=GOLD_N, seed=SEED, path=GOLD_LIST_PATH)
gold_files


Saved gold annotation file list: 8


['1066684_10K_2020_0001104659-21-042359.json',
 '1017655_10K_2020_0001654954-21-003649.json',
 '1378590_10K_2021_0001437749-21-028984.json',
 '1353499_10K_2020_0001344676-21-000004.json',
 '1327567_10K_2021_0001327567-21-000029.json',
 '1082324_10K_2020_0001140361-21-008678.json',
 '1064722_10K_2020_0001760319-21-000039.json',
 '1404655_10K_2020_0001564590-21-006083.json']

In [6]:
def gold_path(fn: str) -> Path:
    return GOLD_DIR / f"{fn}.gold.json"

def load_gold(fn: str) -> dict:
    return load_json(gold_path(fn))

def save_gold(fn: str, obj: dict):
    save_json(gold_path(fn), obj)

def init_gold_record(filename: str, record: dict) -> dict:
    return {
        "filename": filename,
        "company": record.get("company"),
        "filing_date": record.get("filing_date"),
        "period_of_report": record.get("period_of_report"),
        "notes": "",
        "annotations": [],   # list of {label, section, start, end, text}
        "schema_version": SCHEMA_VERSION,
    }

def ensure_gold_templates(gold_files: List[str], raw_data: Dict[str, dict], gold_dir: Path):
    ensure_dir(gold_dir)
    created = 0
    for fn in gold_files:
        out_path = gold_dir / f"{fn}.gold.json"
        if out_path.exists():
            continue
        save_json(out_path, init_gold_record(fn, raw_data[fn]))
        created += 1
    print(f"Gold templates ensured in: {gold_dir} (created {created}, existing {len(gold_files)-created})")

def export_raw_text_files(gold_files: List[str], raw_data: Dict[str, dict], raw_text_dir: Path, sections=("item_10", "item_7")):
    ensure_dir(raw_text_dir)
    for fn in gold_files:
        rec = raw_data[fn]
        for section in sections:
            (raw_text_dir / f"{fn}.{section}.txt").write_text(rec.get(section, "") or "", encoding="utf-8")
    print(f"Exported raw text files to: {raw_text_dir}")

def reset_gold(fn: str):
    gold_path(fn).unlink(missing_ok=True)
    ensure_gold_templates([fn], raw_data, GOLD_DIR)

def show_gold(fn: str, last_n: int = 20):
    gold = load_gold(fn)
    company = raw_data.get(fn, {}).get("company", "UNKNOWN")

    print("FILE:", fn)
    print("Company:", company)
    print("Total annotations:", len(gold.get("annotations", [])))

    for a in gold.get("annotations", [])[-last_n:]:
        print(a)


ensure_gold_templates(gold_files, raw_data, GOLD_DIR)
export_raw_text_files(gold_files, raw_data, RAW_TEXT_DIR)


Gold templates ensured in: gold_annotations (created 8, existing 0)
Exported raw text files to: gold_raw_text


In [7]:
def find_spans(text: str, needle: str) -> List[Tuple[int, int]]:
    if not needle:
        return []
    spans = []
    start = 0
    while True:
        idx = text.find(needle, start)
        if idx == -1:
            break
        spans.append((idx, idx + len(needle)))
        start = idx + 1
    return spans



def add_annotation(fn: str, section: str, label: str, exact_text: str, occurrence: int = 0):
    rec = raw_data[fn]
    source = rec.get(section, "") or ""
    spans = find_spans(source, exact_text)

    if not spans:
        raise ValueError(f"Text not found in {section}: {exact_text!r}")

    if occurrence >= len(spans):
        raise IndexError(f"occurrence={occurrence} but only {len(spans)} matches found for {exact_text!r}")

    start, end = spans[occurrence]
    gold = load_gold(fn)

    gold["annotations"].append({
        "label": label,
        "section": section,
        "start": start,
        "end": end,
        "text": source[start:end],
    })

    save_gold(fn, gold)
    return start, end, len(spans)

def dedupe_annotations(fn: str) -> int:
    gold = load_gold(fn)
    before = len(gold["annotations"])

    seen = set()
    deduped = []
    for a in gold["annotations"]:
        key = (a["label"], a["section"], a["start"], a["end"])
        if key not in seen:
            seen.add(key)
            deduped.append(a)

    gold["annotations"] = deduped
    save_gold(fn, gold)

    return before - len(deduped)



In [8]:
def add_many(fn, section, label, values):
    seen = {}
    for v in values:
        occ = seen.get(v, 0)
        add_annotation(fn, section, label, exact_text=v, occurrence=occ)
        seen[v] = occ + 1


LABELING: '1327567_10K_2021_0001327567-21-000029.json', '1404655_10K_2020_0001564590-21-006083.json'

TO LABEL: 


COMPLETED:
- 1066684_10K_2020_0001104659-21-042359.json
- 1017655_10K_2020_0001654954-21-003649.json
- 1378590_10K_2021_0001437749-21-028984.json
- 1353499_10K_2020_0001344676-21-000004.json
- 1082324_10K_2020_0001140361-21-008678.json
- 1064722_10K_2020_0001760319-21-000039.json
- 
- 


In [9]:
fn = "1066684_10K_2020_0001104659-21-042359.json"

# MONEY (Item 7)
money_values = [
    "$0", "$118,000", "$178,000", "$46,000", "$34,000", "$164,000", "$211,000",
    "$150,000", "$350,000", "$465,000", "$554,100", "$600,000", "$637,500",
    "$707,000", "$24,000", "$2,000", "$689,000", "$7,624", "$86,961",
    "$125,000", "$173,000", "$46,000", "$254,000",
]

# PERSON (Item 10)
person_values = ["Frederick Jones"] * 5

# TITLE (Item 10)
title_values = [
    "President",
    "Chief Executive Officer",
    "Chief\nFinancial Officer",
    "Director",
    "Chief Executive Officer",
    "CEO",
]

# ORG (Item 10)
org_values = [
    "Delfin",
    "Fairwood Peninsula Energy Corporation",
    "Delfin Midstream, Inc.",
    "Energy Global Services FZE",
    "Asiatic Gas Energy Holdings Limited",
    "Marc Rich + Co A.G., Switzerland",
    "Glencore Plc",
]

In [10]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)


Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1066684_10K_2020_0001104659-21-042359.json
Company: THEGLOBE COM INC
Total annotations: 41
{'label': 'MONEY', 'section': 'item_7', 'start': 5744, 'end': 5751, 'text': '$46,000'}
{'label': 'MONEY', 'section': 'item_7', 'start': 5801, 'end': 5809, 'text': '$254,000'}
{'label': 'PERSON', 'section': 'item_10', 'start': 256, 'end': 271, 'text': 'Frederick Jones'}
{'label': 'PERSON', 'section': 'item_10', 'start': 1037, 'end': 1052, 'text': 'Frederick Jones'}
{'label': 'PERSON', 'section': 'item_10', 'start': 3552, 'end': 3567, 'text': 'Frederick Jones'}
{'label': 'PERSON', 'section': 'item_10', 'start': 7976, 'end': 7991, 'text': 'Frederick Jones'}
{'label': 'PERSON', 'section': 'item_10', 'start': 8278, 'end': 8293, 'text': 'Frederick Jones'}
{'label': 'TITLE', 'section': 'item_10', 'start': 272, 'end': 281, 'text': 'President'}
{'label': 'TITLE', 'section': 'item_10', 'start': 283, 'end': 306, 'text': 'Ch

In [11]:
fn = "1017655_10K_2020_0001654954-21-003649.json"

# MONEY (Item 7)
money_values = [
    '$3,541',
    '$19,395',
    '27,845',
    '148,035',
    '114,881',
    '193,150',
    '425,839',
    '2,011',
    '12,348,683',
    '10,185,704',
    '$12,920,789',
    '$10,548,295',
    '$15,854',
    '$3,541',
    '$19,395',
    '$120,190',
    '$27,845',
    '$148,035',
    '$78,269',
    '$114,881',
    '$193,150',
    '$423,828',
    '$425,839',
    '$2,011',
    '$2,162,979',
    '$12,348,683',
    '$10,185,704',
    '$364,548',
    '$3,111,289',
    '$2,746,741',
    '$5,242,763',
    '$3,458,774',
    '$1,783,989',
    '$21,128',
    '$991,840',
    '$970,712',
    '$(2,232,553)',
    '$282,011',
    '$(0.41)',
    '$0.06',
    '$(2,232,553)',
    '$282,011',
    '20,125',
    '488,745',
    '490,250',
    '28,545',
    '22,850',
    '2,452,701',
    '407,974',
    '(880,553)',
    '(120,835)',
    '(73,208)',
    '(739)',
    '(3,688)',
    '464,820',
    '(230,103)',
    '$1,100,809',
    '$15,533',
    '$1,644,210',
    '$475,881',
    '$218,615',
    '$397,891',
    '$616,506'
]

# PERSON (Item 10)
person_values = [
    'W. Austin Lewis, IV',
    'David Scott',
    'Andrew Pilaro',
    'Laurie Bradley',
    'David Ogden',
    'Andrew Pilaro',
    'W. Austin Lewis, IV',
    'Allan Pratt',
    'Andrew Pilaro',
    'W. Austin Lewis, IV',
    'David Ogden',
    'Laurie Bradley',
    'David Scott',
    'Andrew Pilaro',
    'Andrew Pilaro',
    'W. Austin Lewis'
] 

# TITLE (Item 10)
title_values = [
    'Interim CEO',
    'CFO',
    'COO',
    'Director',
    'Director',
    'Director',
    'Director',
    'President',
    'CFO',
    'interim CEO',
    'Chairman of the Audit Committee',
    'Chief Executive Officer',
    'General Partner',
    'interim CEO',
    'CEO',
    'Managing Director of Sales',
    'Managing Director',
    'Chief Executive Officer',
    'President',
    'COO',
    'Director of Technology',
    'CFO'
]

# ORG (Item 10)
org_values = [
   'PAID',
   'PAID',
   'CAP Properties Limited',
   'PAID',
   'MAM Software, Inc.',
   'Lewis Asset Management Corporation',
   'Lewis Opportunity Fund',
   'Lewis Asset Management',
   'Puglisi & Co.',
   'Thompson Davis & Co.',
   'Branch Cabell & Company',
   'Soho Management Consulting',
   'FedEx',
   'FedEx Middle East and Africa',
   'FedEx',
   'FedEx Logistics',
   'FedEx Logistics',
   'FedEx',
   'Flexible Support Group',
   'ASG Renaissance',
   'PAID',
   'KMJ Corbin & Company LLP',
   'KMJ Corbin & Company LLP',
   'PAID'
]

In [12]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1017655_10K_2020_0001654954-21-003649.json
Company: PAID INC
Total annotations: 125
{'label': 'ORG', 'section': 'item_10', 'start': 2052, 'end': 2070, 'text': 'MAM Software, Inc.'}
{'label': 'ORG', 'section': 'item_10', 'start': 2142, 'end': 2176, 'text': 'Lewis Asset Management Corporation'}
{'label': 'ORG', 'section': 'item_10', 'start': 2267, 'end': 2289, 'text': 'Lewis Opportunity Fund'}
{'label': 'ORG', 'section': 'item_10', 'start': 2142, 'end': 2164, 'text': 'Lewis Asset Management'}
{'label': 'ORG', 'section': 'item_10', 'start': 2404, 'end': 2417, 'text': 'Puglisi & Co.'}
{'label': 'ORG', 'section': 'item_10', 'start': 2419, 'end': 2439, 'text': 'Thompson Davis & Co.'}
{'label': 'ORG', 'section': 'item_10', 'start': 2445, 'end': 2468, 'text': 'Branch Cabell & Company'}
{'label': 'ORG', 'section': 'item_10', 'start': 2845, 'end': 2871, 'text': 'Soho Management Consulting'}
{'label': 'ORG', 'sec

In [13]:
fn = "1378590_10K_2021_0001437749-21-028984.json"

# MONEY (Item 7)
money_values = [
    '$2.4 million',
    '$285 thousand',
    '$376 thousand',
    '$352 thousand',
    '$1.3 million',
    '€ 600 thousand',
    '$723 thousand',
    '$0.001',
    '$3.38',
    '$99 thousand',
    '$9.9 million',
    '$4.8 million',
    '$930 thousand',
    '$2.0 million',
    '$2.2 million',
    '$13.3 million',
    '$10.9 million',
    '$1.2 million',
    '$6.7 million',
    '$88 thousand',
    '$5.9 million',
    '$1.2 million',
    '$326 thousand',
    '$960 thousand',
    '$1.0 million',
    '($1.47)',
    '($0.59)',
    '$ 3,296',
    '$ 3,409',
    '$ (113 )',
    '9,963',
    '7,498',
    '2,465',
    '13,259',
    '10,907',
    '2,352',
    '1,743',
    '1,831',
    '(88 )',
    '2,790',
    '2,676',
    '4,533',
    '4,507',
    '8,726',
    '6,400',
    '2,326',
    '2,726',
    '2,614',
    '2,359',
    '2,455',
    '(96 )',
    '2,387',
    '1,641',
    '1,202',
    '1,235',
    '9,909',
    '8,044',
    '1,865',
    '(1,183 )',
    '(1,644 )',
    '(883 )',
    '(7 )',
    '(876 )',
    '(872 )',
    '(5,885 )',
    '1,028',
    '(6,913 )',
    '(7,863 )',
    '(8,200 )',
    '(1,174 )',
    '(1,185 )',
    '$ (6,689 )',
    '$ (7,015 )',
    '$ 1,839',
    '$ (116 )',
    '$ 1,955',
    '$113 thousand',
    '$3.3 million',
    '$3.4 million',
    '$325 thousand',
    '$2.5 million',
    '$10.0 million',
    '$7.5 million',
    '$2.6 million',
    '$26 thousand',
    '$4.5 million',
    '$4.5 million',
    '$88 thousand',
    '$1.7 million',
    '$1.8 million',
    '$114 thousand',
    '$2.8 million',
    '$2.7 million',
    '$2.3 million',
    '$8.7 million',
    '$6.4 million',
    '$112 thousand',
    '$2.7 million',
    '$2.6 million',
    '$96 thousand',
    '$2.4 million',
    '$2.5 million',
    '$746 thousand',
    '$2.4 million',
    '$1.6 million',
    '$234 thousand',
    '$1.2 million',
    '$968 thousand',
    '$1.2 million',
    '$366 thousand',
    '$1.2 million',
    '$1.6 million',
    '$461 thousand',
    '$5.9 million',
    '$1.0 million',
    '$88 thousand',
    '$960 thousand',
    '$88 thousand',
    '$883 thousand',
    '$7 thousand',
    '($1.2) million',
    '$11 thousand',
    '$32 million',
    '$30 million',
    '$1.1 million',
    '$1.1 million',
    '$ (6,689 )',
    '(1,174 )',
    '(88 )',
    '(960 )',
    '5,885',
    '(1,028 )',
    '1,130',
    '1,235',
    '$ 1,442',
    '$ (116 )',
    '$989 thousand',
    '$498 thousand',
    '$4.5 million',
    '$13.5 million',
    '$1.0 million',
    '$14.3 million',
    '$5.8 million',
    '$4.00',
    '$0.001',
    '$2.28',
    '$1,000',
    '$2.51',
    '$4.6 million',
    '$0.001',
    '$3.10',
    '$2.5 million',
    '$2.0 million',
    '$2.2 million',
    '$2.1 million',
    '$732 thousand',
    '$352 thousand',
    '$376 thousand',
    '$1.3 million',
    '$1,047,500',
    '$88 thousand',
    '$550 thousand',
    '$236 thousand',
    '$185 thousand',
    '$101 thousand',
    '$1.9 million',
    '$732 thousand',
    '$3.4 million',
    '$1.2 million',
    '$2.4 million',
    '$2.2 million'
]

# PERSON (Item 10)
person_values = [
    'Joni Kahn',
    'Kenneth Galaznik',
    'Scott Landers',
    'Michael Taglich',
    'Roger Kahn',
    'Thomas R. Windhausen',
    'Mark G. Downey',
    'Joni Kahn',
    'Kenneth Galaznik',
    'Scott Landers',
    'Michael Taglich',
    'Robert Taglich',
    'Michael Taglich',
    'Roger Kahn',
    'Thomas Windhausen'
] 

# TITLE (Item 10)
title_values = [
    'Chairperson',
    'Director',
    'Director',
    'Director',
    'Director',
    'President',
    'Chief Executive Officer',
    'Chief Financial Officer',
    'Member of the Audit Committee',
    'Member of the Compensation Committee',
    'Member of the Nominating and Governance Committee',
    'Independent director',
    'Chief Financial Officer',
    'Treasurer',
    'Chief Financial Officer',
    'Chairperson of the Board of Directors',
    'Chair of the Compensation Committee',
    'Senior Vice President of Global Services',
    'Vice President of Services',
    'Executive Vice President',
    'Senior Group Vice President',
    'Member of the Board of Directors',
    'Executive Vice President',
    'Partner',
    'Chairperson',
    "Chairman of the Company’s Audit Committee",
    'Senior Vice President',
    'Chief Financial Officer',
    'Treasurer',
    'Vice President of Finance',
    'Vice President of Finance',
    'Chair of the Nominating and Corporate Governance Committee',
    'President',
    'Chief Executive Officer',
    'Chief Operating Officer',
    'Chief Financial Officer',
    'Vice President of Global Finance',
    'Vice President of Finance and Administration',
    'Business Assurance Manager',
    'Chairman',
    'President',
    'Chairman of the Board',
    'Chief Operating Officer',
    'President',
    'Chief Executive Officer',
    'General Manager',
    'Chief Technology Officer',
    'Chief Financial Officer',
    'Treasurer',
    'VP of Finance',
    'VP of Finance',
    'President',
    'Chief Executive Officer',
    'Chair',
    'Chairman of the Audit Committee',
    'Audit Committee Financial Expert',
    'Chair',
    'Chair'
]

# ORG (Item 10)
org_values = [
    'Big Machines, Inc.',
    'Oracle',
    'HP',
    'BearingPoint',
    'Business Objects',
    'Business Objects',
    'SAP',
    'MapInfo',
    'MapInfo',
    'MapInfo',
    'Pitney Bowes',
    'KPMG Consulting',
    'American Science and Engineering, Inc.',
    'American Science and Engineering, Inc.',
    'Spectro Analytical Instruments, Inc.',
    'Monotype Imaging Holdings, Inc.',
    'Pitney Bowes Software',
    'Pitney Bowes',
    'MapInfo',
    'Pitney Bowes',
    'Coopers & Lybrand',
    'Taglich Brothers, Inc.',
    'Taglich Brothers, Inc.',
    'Air Industries Group Inc.',
    'Mare Island Dry Dock Inc.',
    'Bridgeline Digital',
    'FatWire',
    'FatWire',
    'FatWire',
    'Oracle',
    'Comtech Telecommunications Corp.',
    'Dealertrack Technologies, Inc.',
    'Cox Automotive Inc.',
    'PricewaterhouseCoopers'

]

In [14]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1378590_10K_2021_0001437749-21-028984.json
Company: Bridgeline Digital, Inc.
Total annotations: 278
{'label': 'ORG', 'section': 'item_10', 'start': 4657, 'end': 4693, 'text': 'Spectro Analytical Instruments, Inc.'}
{'label': 'ORG', 'section': 'item_10', 'start': 5502, 'end': 5533, 'text': 'Monotype Imaging Holdings, Inc.'}
{'label': 'ORG', 'section': 'item_10', 'start': 5923, 'end': 5944, 'text': 'Pitney Bowes Software'}
{'label': 'ORG', 'section': 'item_10', 'start': 5923, 'end': 5935, 'text': 'Pitney Bowes'}
{'label': 'ORG', 'section': 'item_10', 'start': 6190, 'end': 6197, 'text': 'MapInfo'}
{'label': 'ORG', 'section': 'item_10', 'start': 5973, 'end': 5985, 'text': 'Pitney Bowes'}
{'label': 'ORG', 'section': 'item_10', 'start': 6346, 'end': 6363, 'text': 'Coopers & Lybrand'}
{'label': 'ORG', 'section': 'item_10', 'start': 7104, 'end': 7126, 'text': 'Taglich Brothers, Inc.'}
{'label': 'ORG', 'section

In [15]:
fn = "1353499_10K_2020_0001344676-21-000004.json"

# MONEY (Item 7)
money_values = [
    '$30,000',
    '$10,000,000',
    '$.404',
    '$ 15,703,617',
    '$120 Billion',
    '$89,929',
    '$110,547',
    '20,618',
    '$0',
    '$20,800',
    '$ 20,800',
    '$59,200',
    '$32,754',
    '$26,446',
    '$342,000',
    '$450,000',
    '$108,000',
    '$1,497,818',
    '$0',
    '$0',
    '$82,972,471',
    '$18,236',
    '$100,000',
    '$200,000',
    '$100,000',
    '$200,000',
    '$960,000',
    '$436,373',
    '$89,655',
    '$0',
    '$0'
]

# PERSON (Item 10)
person_values = [
    'John Blaisure',
    'Greg Halpern',
    'Greg Halpern',
    'Greg Halpern',
    'Greg Halpern',
    'Greg Halpern'
]

# TITLE (Item 10)
title_values = [
    'President',
    'Chief Executive Officer',
    'Chairman',
    'President',
    'Chief Executive',
    'Officer',
    'Chief Financial Officer',
    'Chairman',
    'CFO',
    'Founder',
    'CEO',
    'Chief Executive Officer',
    'CEO',
    'Chairman',
    'President',
    'Chief Executive Officer',
    'Chief Financial Officer',
    'Chief Executive Officer',
    'Chief Financial Officer'
]

# ORG (Item 10)
org_values = [
    'MAX-D',
    'Max Sound Corporation',
    'Circle Group Internet, Inc.',
    'Circle Group Holdings Inc.',
    'Z-Trim Holdings Inc.',
    'People for Ultimate Kindness Toward All Living Creatures on Earth',
    'So Act Network',
    'So Act Network',
    'Max Sound Corporation',
    'Max Sound Corporation'
]

In [16]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1353499_10K_2020_0001344676-21-000004.json
Company: Max Sound Corp
Total annotations: 66
{'label': 'TITLE', 'section': 'item_10', 'start': 544, 'end': 551, 'text': 'Founder'}
{'label': 'TITLE', 'section': 'item_10', 'start': 840, 'end': 843, 'text': 'CEO'}
{'label': 'TITLE', 'section': 'item_10', 'start': 929, 'end': 952, 'text': 'Chief Executive Officer'}
{'label': 'TITLE', 'section': 'item_10', 'start': 1037, 'end': 1040, 'text': 'CEO'}
{'label': 'TITLE', 'section': 'item_10', 'start': 2162, 'end': 2170, 'text': 'Chairman'}
{'label': 'TITLE', 'section': 'item_10', 'start': 2172, 'end': 2181, 'text': 'President'}
{'label': 'TITLE', 'section': 'item_10', 'start': 2183, 'end': 2206, 'text': 'Chief Executive Officer'}
{'label': 'TITLE', 'section': 'item_10', 'start': 2211, 'end': 2234, 'text': 'Chief Financial Officer'}
{'label': 'TITLE', 'section': 'item_10', 'start': 3379, 'end': 3402, 'text': 'Chief E

In [17]:
fn = '1327567_10K_2021_0001327567-21-000029.json'

# MONEY (Item 7)
money_values = [
    '$4.3 billion',
    '$3.4 billion',
    '$1.1 billion',
    '$3.1 billion',
    '$ 5,024.0',
    '$ 3,810.2',
    '$ 3,789.4',
    '$ 4,302.2',
    '$ 4,256.1',
    '$ 3,408.4',
    '$ 2,899.6',
    '$ (304.1)',
    '$ (179.0)',
    '$ (54.1)',
    '$ 5,452.2',
    '$ 4,301.7',
    '$ 3,489.8',
    '$ 1,503.0',
    '$ 1,035.7',
    '$ 1,055.6',
    '$ 1,387.0',
    '$ 821.3',
    '$ 924.4',
    '$ 4,256.1',
    '$ 3,408.4',
    '$ 2,899.6',
    '1,196.1',
    '893.3',
    '590.2',
    '$ 5,452.2',
    '$ 4,301.7',
    '$ 3,489.8',
    '$ 1,503.0',
    '$ 1,035.7',
    '$ 1,055.6',
    '116.0',
    '214.4',
    '131.2',
    '$ 1,387.0',
    '$ 821.3',
    '$ 924.4',
    '$ (1,480.6)',
    '$ 288.0',
    '$ (1,825.9)',
    '$ (1,104.0)',
    '$ 673.0',
    '$ (773.9)',
    '$ 1,120.3',
    '$ 1,064.2',
    '$ 1,096.2',
    '3,135.8',
    '2,344.2',
    '1,803.4',
    '308.5',
    '294.4',
    '315.9',
    '966.4',
    '705.1',
    '492.5',
    '1,274.9',
    '999.5',
    '808.4',
    '2,981.2',
    '2,408.9',
    '2,091.2',
    '1,140.4',
    '768.1',
    '539.5',
    '1,753.8',
    '1,520.2',
    '1,344.0',
    '391.1',
    '299.6',
    '261.8',
    '3,285.3',
    '2,587.9',
    '2,145.3',
    '$ 1,898.8', 
    '$ 1,405.3',
    '$ 493.5',
    '$ 1,405.3',
    '$ 1,032.7',
    '$ 372.6',
    '1,237.0',
    '938.9',
    '298.1',
    '938.9',
    '770.7',
    '168.2',
    '$ 3,135.8',
    '$ 2,344.2',
    '$ 791.6',
    '$ 2,344.2',
    '$ 1,803.4',
    '$ 540.8',
    '$ 2,937.5',
    '$ 2,318.0',
    '$ 619.5',
    '$ 2,318.0',
    '$ 1,977.0',
    '$ 341.0',
    '817.3',
    '671.9',
    '145.4',
    '671.9',
    '568.6',
    '103.3',
    '501.3',
    '418.5',
    '82.8',
    '418.5',
    '354.0',
    '64.5',
    '$ 4,256.1',
    '$ 3,408.4',
    '$ 847.7',
    '$ 3,408.4',
    '$ 2,899.6',
    '$ 508.8',
    '$ 308.5',
    '$ 294.4',
    '$ 14.1',
    '$ 294.4',
    '$ 315.9',
    '$ (21.5)',
    '(304.1)',
    '(179.0)',
    '(54.1)',
    '(163.3)',
    '(88.7)',
    '(83.9)',
    '2.4',
    '35.9',
    '63.4',
    '(465.0)',
    '(231.8)',
    '(74.6)',
    '33.9',
    '35.2',
    '7.3',
    '$ (498.9)',
    '$ (267.0)',
    '$ (81.9)',
    '$ 6.2',
    '$ 5.7',
    '$ 5.6',
    '93.0',
    '77.7',
    '71.3',
    '428.9',
    '274.6',
    '186.8',
    '269.9',
    '214.5',
    '221.9',
    '128.9',
    '92.0',
    '102.1',
    '$ 926.9',
    '$ 664.5',
    '$ 587.7',
    '$ 1,120.3',
    '$ 1,064.2',
    '$ 56.1',
    '$ 1,064.2',
    '$ 1,096.2',
    ' (32.0)',
    '$ 966.4',
    '$ 705.1',
    '$ 261.3',
    '$ 705.1',
    '$ 492.5',
    '$ 212.6',
    '$97.1 million',
    '$412.4 million',
    '45.6 million',
    '$ 811.8',
    '$ 769.8',
    '$ 780.3',
    '2,169.4',
    '1,639.1',
    '1,310.9',
    '$ 2,981.2',
    '$ 2,408.9',
    '$ 2,091.2',
    '$2.0 billion',
    '$ 1,140.4',
    '$ 768.1',
    '$ 372.3',
    '$ 768.1',
    '$ 539.5',
    '$ 228.6',
    '$321.2 million',
    '$911.0 million',
    '$ 1,753.8',
    '$ 1,520.2',
    '$ 233.6',
    '$ 1,520.2',
    '$ 1,344.0',
    '$ 176.2',
    '$186.9 million',
    '$1.3 billion',
    '$ 391.1',
    '$ 299.6',
    '$ 91.5',
    '$ 299.6',
    '$ 261.8',
    '$ 37.8',
    '$48.9 million',
    '$244.0 million',
    '$ 163.3',
    '$ 88.7',
    '$ 74.6',
    '$ 88.7',
    '$ 83.9',
    '$ 4.8',
    '$ 2.4',
    '$ 35.9',
    '$ (33.5)',
    '$ 35.9',
    '$ 63.4',
    '$ (27.5)',
    '$ 33.9',
    '$ 35.2',
    '$ (1.3)',
    '$ 35.2',
    '$ 7.3',
    '$ 27.9',
    '$ (469.4)',
    '$ 2,437.5',
    '$ 1,874.2',
    '$ 2,958.0',
    '1,915.2',
    '1,344.2',
    '$ 3,789.4',
    '$ 4,302.2',
    '3.8 billion',
    '$942.3 million',
    '575.0 million',
    '575.0 million',
    '1.7 billion',
    '2.0 billion',
    '400.0 million',
    '350.0 million',
    '1.0 billion',
    '700.0 million',
    '1.7 billion',
    '323.9 million',
    '676.1 million',
    '1.0 billion',
    '1.0 billion',
    '$ 1,503.0',
    '$ 1,035.7',
    '$ 1,055.6',
    '(1,480.6)',
    '288.0',
    '(1,825.9)',
    '(1,104.0)',
    '673.0',
    '(773.9)',
    '$ (1,081.6)',
    '$ 1,996.7',
    '$ (1,544.2)',
    '1.5 billion',
    '467.3 million',
    '$1.5 billion',
    '$1.8 billion',
    '$288.0 million',
    '$1.1 billion',
    '$1.8 billion',
    '673.0 million',
    '$1.8 billion',
    '$ 1,692.0',
    '$ 1,692.0',
    '2,000.0',
    '2,000.0',
    '426.6',
    '78.8',
    '134.2',
    '116.1',
    '97.5',
    '1,831.7',
    '251.3',
    '609.0',
    '971.4',
    '$ 5,950.3',
    '$ 330.1',
    '$ 2,435.2',
    '$ 3,087.5',
    '97.5',
    '80.6 million'
]

# PERSON (Item 10)
person_values = [
 
]

# TITLE (Item 10)
title_values = [

]

# ORG (Item 10)
org_values = [

]

In [18]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)

#“Item 10 incorporated by reference to proxy statement; no extractable entities present.”
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1327567_10K_2021_0001327567-21-000029.json
Company: Palo Alto Networks Inc
Total annotations: 291
{'label': 'MONEY', 'section': 'item_7', 'start': 46282, 'end': 46294, 'text': '$1.8 billion'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46677, 'end': 46686, 'text': '$ 1,692.0'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46691, 'end': 46700, 'text': '$ 1,692.0'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46750, 'end': 46757, 'text': '2,000.0'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46762, 'end': 46769, 'text': '2,000.0'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46800, 'end': 46805, 'text': '426.6'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46806, 'end': 46810, 'text': '78.8'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46811, 'end': 46816, 'text': '134.2'}
{'label': 'MONEY', 'section': 'item_7', 'start': 46817, 'end': 46822, 'text': '116.1'}
{'label': 'MONEY

In [19]:
fn = '1082324_10K_2020_0001140361-21-008678.json'

# MONEY (Item 7)
money_values = [
    '$368,000',
    '$302,400',
    '$1.20',
    '$1.80',
    '$439,700',
    '$302,400',
    '$41,300',
    '$96,000',
    '$454,034',
    '$502,600',
    '$1.20',
    '$502,800',
    '$0.84',
    '$36,300',
    '$324',
    '$1,790',
    '$8',
    '302,636',
    '$302,636',
    '$85',
    '$454,034',
    '$90,101',
    '8,830',
    '3,845',
    '$8,830',
    '$3,845',
    '45,812',
    '15,905',
    '$45,812',
    '$15,905',
    '$30,699',
    '$5,898',
    '$41,271',
    '108,288',
    '$108,288',
    '$92',
    '$108,221',
    '$307,452',
    '$19,573',
    '$107,989',
    '$192,908',
    '$28,348',
    '$3,135',
    '$2,394',
    '$100,000',
    '$50,000',
    '$21,964',
    '$4.41',
    '$4,627',
    '$139'

]

# PERSON (Item 10)
person_values = [
 
]

# TITLE (Item 10)
title_values = [

]

# ORG (Item 10)
org_values = [

]


In [20]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)

#“Item 10 incorporated by reference to proxy statement; no extractable entities present.”
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1082324_10K_2020_0001140361-21-008678.json
Company: VirnetX Holding Corp
Total annotations: 50
{'label': 'MONEY', 'section': 'item_7', 'start': 36141, 'end': 36148, 'text': '$30,699'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36153, 'end': 36159, 'text': '$5,898'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36385, 'end': 36392, 'text': '$41,271'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36675, 'end': 36682, 'text': '108,288'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36758, 'end': 36766, 'text': '$108,288'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36800, 'end': 36803, 'text': '$92'}
{'label': 'MONEY', 'section': 'item_7', 'start': 36841, 'end': 36849, 'text': '$108,221'}
{'label': 'MONEY', 'section': 'item_7', 'start': 37739, 'end': 37747, 'text': '$307,452'}
{'label': 'MONEY', 'section': 'item_7', 'start': 37785, 'end': 37792, 'text': '$19,573'}
{'label': 'MONEY', 's

In [21]:
fn = '1064722_10K_2020_0001760319-21-000039.json'

# MONEY (Item 7)
money_values = [
    '$38,000',
    '$3',
    '$1,205,000',
    '$423,136',
    '$1,628,136',
    '$464',
    '$1,179,827',
    '374,691',
    '$159,463',
    '$82,980',
    '540,524',
    '63,632',
    '41,200',
    '$500',
    '$63,632'
 ]

# PERSON (Item 10)
person_values = [
    'Mr. Frank I Igwealor',
    'Mr. Patience Ogbozor',
    'Bishop Christopher E Milton',
    'Dr. Solomon K.S. Mbagwu',
    'Solomon KN Mbagwu',
    'Frank Igwealor',
    'Ms. Patience C. Ogbozor',
    'Bishop Christopher E. Milton',
    'Dr. Solomon KN Mbagwu',
    'Frank I Igwealor',
    'Frank I Igwealor'
]

# TITLE (Item 10)
title_values = [
    'Chairman',
    'Director',
    'Chief Executive',
    'Financial Officer',
    'Director',
    'Director',
    'Director',
    'CPA',
    'CMA',
    'JD',
    'MBA',
    'MSRM',
    'Certified Financial Manager',
    'Certified Management Accountant',
    'Certified Public Accountant',
    'Attorney',
    'CPA',
    'CMA',
    'CFM',
    'Director',
    'President',
    'CEO',
    'Director',
    'Senior Pastor',
    'Jurisdictional Prelate Southern California Evangelistic',
    'Chairman of the Internal Audit Committee of the Board of Bishops',
    'Chairman',
    'MD',
    'Director',
    'Chair of our Audit Committee',
    'President',
    'Chief Executive Officer',
    'Chief Financial Officer',
    'President',
    'Chief Executive Officer',
    'Chief Financial Officer'
]

# ORG (Item 10)
org_values = [
    'Cannabinoid Biosciences',
    'Goldstein Franklin Inc.',
    'Kid Castle Educational Corporation',
    'Video Rivers Networks, Inc.',
    'Opportunity Zone Capital LLC',
    'New Haven Pharmacy',
    'The Church of God in Christ, Inc.',
    'The Church of God in Christ, Inc.',
    'GiveMePower Corporation',
    'GiveMePower Corporation'
]


In [22]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)

add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1064722_10K_2020_0001760319-21-000039.json
Company: GIVEMEPOWER CORP
Total annotations: 72
{'label': 'TITLE', 'section': 'item_10', 'start': 3605, 'end': 3613, 'text': 'Chairman'}
{'label': 'TITLE', 'section': 'item_10', 'start': 3877, 'end': 3879, 'text': 'MD'}
{'label': 'TITLE', 'section': 'item_10', 'start': 1625, 'end': 1633, 'text': 'Director'}
{'label': 'TITLE', 'section': 'item_10', 'start': 5699, 'end': 5727, 'text': 'Chair of our Audit Committee'}
{'label': 'TITLE', 'section': 'item_10', 'start': 10601, 'end': 10610, 'text': 'President'}
{'label': 'TITLE', 'section': 'item_10', 'start': 10612, 'end': 10635, 'text': 'Chief Executive Officer'}
{'label': 'TITLE', 'section': 'item_10', 'start': 10640, 'end': 10663, 'text': 'Chief Financial Officer'}
{'label': 'TITLE', 'section': 'item_10', 'start': 12945, 'end': 12954, 'text': 'President'}
{'label': 'TITLE', 'section': 'item_10', 'start': 12956, '

In [23]:
fn = '1404655_10K_2020_0001564590-21-006083.json'

# MONEY (Item 7)
money_values = [
    '$883.0 million',
    '$674.9 million',
    '$513.0 million',
    '$85.0 million',
    '$53.7 million',
    '$63.8 million',
    '9,582',
    '9,920',
    '9,904',
    '$10',
    '$10',
    '853,025',
    '646,266',
    '487,450',
    '30,001',
    '28,594',
    '25,530',
    '883,026',
    '674,860',
    '512,980',
    '130,685',
    '98,510',
    '69,718',
    '36,274',
    '31,448',
    '30,639',
    '166,959',
    '129,958',
    '100,357',
    '716,067',
    '544,902',
    '412,623',
    '205,589',
    '158,237',
    '117,603',
    '452,081',
    '340,685',
    '267,444',
    '109,225',
    '92,971',
    '75,834',
    '766,895',
    '591,893',
    '460,881',
    '(50,828',
    '(46,991',
    '(48,258',
    '7,773',
    '19,429',
    '9,176',
    '(37,049',
    '(22,818',
    '(21,386',
    '(711',
    '(393',
    '(1,492',
    '(29,987',
    '(3,782',
    '(13,702',
    '(80,815',
    '(50,773',
    '(61,960',
    '(4,216',
    '(2,973',
    '(1,868',
    '(85,031',
    '(53,746',
    '(63,828',
    '853,025',
    '646,266',
    '206,759',
    '30,001',
    '28,594',
    '1,407',
    '883,026',
    '674,860',
    '208,166',
    '$9,920',
    '$9,582',
    '166,959',
    '129,958',
    '37,001',
    '716,067',
    '544,902',
    '171,165',
    '130,685',
    '98,510',
    '32,175',
    '21,990',
    '5,238',
    '4,834',
    '(861',
    '32,175',
    '36,274',
    '31,448',
    '4,826',
    '4,826',
    '4,826',
    '205,589',
    '158,237',
    '47,352',
    '39,791',
    '7,561',
    '47,352',
    '452,081',
    '340,685',
    '111,396',
    '65,646',
    '17,536',
    '14,637',
    '13,577',
    '111,396',
    '109,225',
    '92,971',
    '16,254',
    '7,008',
    '5,884',
    '3,362',
    '16,254',
    '7,773',
    '19,429',
    '(11,656',
    '(37,049',
    '(22,818',
    '(14,231',
    '(711',
    '(393',
    '(318',
    '$10.5 million',
    '(4,216',
    '(2,973',
    '(1,243',
    '646,266',
    '487,450',
    '158,816',
    '28,594',
    '25,530',
    '3,064',
    '674,860',
    '512,980',
    '161,880',
    '$9,904',
    '$9,920',
    '129,958',
    '100,357',
    '29,601',
    '544,902',
    '412,623',
    '132,279',
    '98,510',
    '69,718',
    '28,792',
    '15,857',
    '7,234',
    '2,864',
    '1,808',
    '1,029',
    '28,792',
    '31,448',
    '30,639',
    '158,237',
    '117,603',
    '40,634',
    '35,942',
    '4,692',
    '40,634',
    '340,685',
    '267,444',
    '73,241',
    '49,875',
    '9,900',
    '6,686',
    '6,780',
    '73,241',
    '92,971',
    '75,834',
    '17,137',
    '11,619',
    '2,961',
    '2,557',
    '17,137',
    '19,429',
    '9,176',
    '10,253',
    '(22,818',
    '(21,386',
    '(1,432',
    '(393',
    '(1,492',
    '1,099',
    '(2,973',
    '(1,868',
    '(1,105',
    '378,123',
    '269,670',
    '111,489',
    '1,011,420',
    '787,235',
    '475,409',
    '88,913',
    '118,973',
    '84,851',
    '(215,567',
    '(316,194',
    '(71,230',
    '222,460',
    '359,342',
    '12,778',
    '$83.7 million',
    '$85.0 million',
    '$49.0 million',
    '$2.2 million',
    '$3.7 million',
    '$37.1 million',
    '$121.5 million',
    '$10.5 million',
    '$24.9 million',
    '$72.6 million',
    '$26.0 million',
    '$3.7 million',
    '$31.4 million',
    '$31.6 million',
    '$30.0 million',
    '$19.3 million',
    '$17.0 million',
    '$53.7 million',
    '$14.2 million',
    '$28.8 million',
    '$97.8 million',
    '$21.8 million',
    '$49.3 million',
    '$7.8 million',
    '$3.9 million',
    '$22.7 million',
    '$15.8 million',
    '$15.4 million',
    '$9.7 million',
    '$3.3 million',
    '$63.8 million',
    '$6.8 million',
    '$23.4 million',
    '$76.3 million',
    '$2.3 million',
    '$20.3 million',
    '$49.3 million',
    '$11.9 million',
    '$5.8 million',
    '$3.9 million',
    '$3.3 million',
    '$17.7 million',
    '$23.9 million',
    '$1.5 billion',
    '$37.3 million',
    '$2.5 million',
    '$21.6 million',
    '$1.4 billion',
    '$10.9 million',
    '$1.3 billion',
    '$40.4 million',
    '$13.5 million',
    '$23.3 million',
    '$0.6 million',
    '$1.1 billion',
    '$681.6 million',
    '$22.3 million',
    '$11.2 million',
    '$0.5 million',
    '$644.4 million',
    '$450.1 million',
    '$363.6 million',
    '$30.4 million',
    '$236.0 million',
    '$327.5 million',
    '$50.6 million',
    '$7.4 million',
    '$342.6 million',
    '$23.6 million',
    '$6.2 million',
    '$0.3 million',
    '$0.3 million',
    '$21.6 million',
    '$8.0 million',
    '$0.7 million',
    '$90.8 million',
    '388,962',
    '51,933',
    '100,443',
    '97,303',
    '139,283',
    '102,397',
    '31,138',
    '71,259',
    '491,359',
    '83,071',
    '171,702',
    '97,303',
    '139,283',
    '$3.4 million'
 ]

# PERSON (Item 10)
person_values = [

]

# TITLE (Item 10)
title_values = [

]

# ORG (Item 10)
org_values = [

]


In [24]:
reset_gold(fn)

add_many(fn, "item_7",  "MONEY",  money_values)

#“Item 10 incorporated by reference to proxy statement; no extractable entities present.”
add_many(fn, "item_10", "PERSON", person_values)
add_many(fn, "item_10", "TITLE",  title_values)
add_many(fn, "item_10", "ORG",    org_values)

removed = dedupe_annotations(fn)
print("Deduped:", removed)

show_gold(fn)

Gold templates ensured in: gold_annotations (created 1, existing 0)
Deduped: 0
FILE: 1404655_10K_2020_0001564590-21-006083.json
Company: HUBSPOT INC
Total annotations: 298
{'label': 'MONEY', 'section': 'item_7', 'start': 50790, 'end': 50802, 'text': '$0.3 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 50870, 'end': 50882, 'text': '$0.3 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 51042, 'end': 51055, 'text': '$21.6 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 51160, 'end': 51172, 'text': '$8.0 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 51270, 'end': 51282, 'text': '$0.7 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 58772, 'end': 58785, 'text': '$90.8 million'}
{'label': 'MONEY', 'section': 'item_7', 'start': 58984, 'end': 58991, 'text': '388,962'}
{'label': 'MONEY', 'section': 'item_7', 'start': 58992, 'end': 58998, 'text': '51,933'}
{'label': 'MONEY', 'section': 'item_7', 'start': 58999, 'end': 59006, 'text': '100,44