In [38]:
!pip install beautifulsoup4



**index**

In [48]:
!ls ../../data/SEC-API.io/form-8k-filings/item-4.02-structured-data/

2004-structured-data.jsonl  2014-structured-data.jsonl
2005-structured-data.jsonl  2015-structured-data.jsonl
2006-structured-data.jsonl  2016-structured-data.jsonl
2007-structured-data.jsonl  2017-structured-data.jsonl
2008-structured-data.jsonl  2018-structured-data.jsonl
2009-structured-data.jsonl  2019-structured-data.jsonl
2010-structured-data.jsonl  2020-structured-data.jsonl
2011-structured-data.jsonl  2021-structured-data.jsonl
2012-structured-data.jsonl  2022-structured-data.jsonl
2013-structured-data.jsonl  2023-structured-data.jsonl


**13D-13G Filings**

In [39]:
import gzip
import json
import os # For creating dummy files
from bs4 import BeautifulSoup

def read_json_file(filepath):
    """
    Reads a standard JSON file and returns the parsed Python object (dict or list).
    """
    with open(filepath, 'r', encoding='utf-8') as f:
        try:
            data = json.load(f)
            return data
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from file '{filepath}'. Error: {e}")
            return None
        except FileNotFoundError:
            print(f"File not found: '{filepath}'")
            return None

def read_jsonl_line_by_line(filepath):
    """
    Reads a JSONL file line by line and yields each JSON object.
    """
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            # Skip empty lines if any
            if line.strip():
                try:
                    yield json.loads(line.strip())
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line: {line.strip()}. Error: {e}")

def read_single_json_gz(filepath):
    """
    Reads a .json.gz file containing a single JSON object or array.
    """
    try:
        # Open the gzipped file in text mode ('rt') for automatic decompression and text decoding
        with gzip.open(filepath, 'rt', encoding='utf-8') as f:
            data = json.load(f) # json.load directly reads from the file-like object
            return data
    except FileNotFoundError:
        print(f"Error: File not found at '{filepath}'")
        return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from '{filepath}': {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

def pretty_print_html(filepath):
  """
  Reads an HTML file and pretty prints its content.
  """
  with open(filepath, 'r', encoding='utf-8') as f:
    soup = BeautifulSoup(f, 'html.parser')
    print(soup.prettify())

In [14]:
# Example Usage:
file_path = '../../data/SEC-API.io/13d-13g-filings/13d-1994-2024.jsonl'

# You can also store them in a list if the file is not too large
all_records = list(read_jsonl_line_by_line(file_path))
print(f"Number of JSON objects restored: {len(all_records)}")

Number of JSON objects restored: 196333


In [8]:
all_records[0]

{'id': '81cca9fe3afe8faa5e5cec80ce0af64d',
 'accessionNo': '0000950109-94-002419',
 'formType': 'SC 13D/A',
 'filedAt': '1994-12-30T00:00:00-05:00',
 'filers': [{'cik': '50957', 'name': 'INTERCO INC (Subject)'},
  {'cik': '916151', 'name': 'APOLLO INTERCO PARTNERS L P (Filed by)'}],
 'nameOfIssuer': 'INTERCO INCORPORATED -',
 'titleOfSecurities': 'Common Stock -',
 'cusip': ['458507100'],
 'eventDate': '1994-12-23',
 'schedule13GFiledPreviously': True,
 'owners': [{'name': 'Apollo Interco Partners, L.P. -',
   'memberOfGroup': {'a': False, 'b': False},
   'sourceOfFunds': ['SC'],
   'place': 'Delaware -',
   'soleVotingPower': 0,
   'sharedVotingPower': 0,
   'soleDispositivePower': 0,
   'sharedDispositivePower': 0,
   'aggregateAmountOwned': 0,
   'amountExcludesCertainShares': False,
   'amountAsPercent': 0,
   'typeOfReportingPerson': ['PN'],
   'legalProceedingsDisclosureRequired': False}]}

**13F Filings**

In [17]:
file_path = '../../data/SEC-API.io/13f-filings/13f-cover-pages-2013-2023/1000097/0000919574-13-005176.json'

json_obj = read_json_file(file_path)

json_obj

{'headerData': {'submissionType': '13F-HR',
  'filerCik': 1000097,
  'periodOfReport': '06-30-2013',
  'accessionNo': '0000919574-13-005176',
  'filedAt': '2013-08-14T18:14:43-04:00'},
 'formData': {'coverPage': {'reportCalendarOrQuarter': '06-30-2013',
   'isAmendment': False,
   'filingManager': {'name': 'KINGDON CAPITAL MANAGEMENT, L.L.C.',
    'address': {'ns1:street1': '152 West 57th Street',
     'ns1:street2': '50th Floor',
     'ns1:city': 'New York',
     'ns1:stateOrCountry': 'NY',
     'ns1:zipCode': 10019}},
   'reportType': '13F HOLDINGS REPORT',
   'form13FFileNumber': '028-04575',
   'provideInfoForInstruction5': 'N'},
  'signatureBlock': {'name': 'William Walsh',
   'title': 'Chief Financial Officer',
   'phone': '212-333-0100',
   'signature': '/s/ William Walsh',
   'city': 'New York',
   'stateOrCountry': 'NY',
   'signatureDate': '08-14-2013'},
  'summaryPage': {'otherIncludedManagersCount': 3,
   'tableEntryTotal': 195,
   'tableValueTotal': 1690760,
   'otherManag

**filing metadata**

In [28]:
file_path = "../../data/SEC-API.io/filing-metadata/1994/1994-01.json.gz"

json_obj = read_single_json_gz(file_path)
print(f"Number of JSON objects restored: {len(json_obj)}\n\n")
print(json_obj[0])

Number of JSON objects restored: 3044


{'ticker': '', 'formType': '35-CERT', 'accessionNo': '0000007323-94-000008', 'cik': '96035', 'companyName': 'SYSTEM FUELS INC', 'linkToFilingDetails': 'https://www.sec.gov/Archives/edgar/data/96035/', 'description': 'Form 35-CERT - Certificate, terms and conditions [Rule 24]', 'linkToTxt': 'https://www.sec.gov/Archives/edgar/data/96035/0000007323-94-000008.txt', 'filedAt': '1994-01-31T00:00:00-05:00', 'documentFormatFiles': [{'sequence': '1', 'size': '18199', 'documentUrl': 'https://www.sec.gov/Archives/edgar/data/96035/', 'description': 'FILE NO. 70-7668', 'type': '35-CERT'}, {'sequence': '\xa0', 'size': '19530', 'documentUrl': 'https://www.sec.gov/Archives/edgar/data/96035/0000007323-94-000008.txt', 'description': 'Complete submission text file', 'type': '\xa0'}], 'entities': [{'fiscalYearEnd': '1231', 'stateOfIncorporation': 'LA', 'act': '35', 'cik': '96035', 'fileNo': '070-07668', 'companyName': 'SYSTEM FUELS INC (Filer)', 'type': '35-CERT', 

**8-K Filings**

In [41]:
file_path = "../../data/SEC-API.io/filing-sections/8-K/1000045/000095017021001640-item7-1.html"

pretty_print_html(file_path)

<span style="background-color:rgba(0,0,0,0);color:rgba(0,0,0,1);white-space:pre-wrap;font-weight:bold;font-size:10.0pt;font-family:Times New Roman;">
 Item 7.01 Regulation FD Disclosure
</span>
<span style="color:rgba(0,0,0,1);white-space:pre-wrap;font-weight:bold;font-size:10.0pt;font-family:Times New Roman;">
</span>
<p style="text-indent:0.0%;font-size:10.0pt;margin-top:6.0pt;line-height:1.3;font-family:Times New Roman;margin-bottom:0.0pt;text-align:left;">
 <span style="background-color:rgba(0,0,0,0);color:rgba(0,0,0,1);white-space:pre-wrap;font-weight:normal;font-size:10.0pt;font-family:Times New Roman;">
  On September 2, 2021, the Company used an investor slide deck at its Annual General Meeting of Shareholders, which is attached hereto as Exhibit 99.1.
 </span>
</p>
<p style="text-indent:0.0%;font-size:10.0pt;margin-top:6.0pt;line-height:1.3;font-family:Times New Roman;margin-bottom:0.0pt;text-align:left;">
 <span style="white-space:pre-wrap;font-size:10.0pt;font-family:Times N

**

In [53]:
file_path = "../../data/SEC-API.io/form-8k-filings/item-4.02-structured-data/2004-structured-data.jsonl"

json_list = [obj for obj in read_jsonl_line_by_line(file_path)]
print(f"{len(json_list)} many JSON objects are restored.\n\n")
print(json_list[0])

172 many JSON objects are restored.


{'id': 'e05640114bc8c08a0407d75811e84ab8', 'accessionNo': '0000898080-04-000677', 'formType': '8-K', 'filedAt': '2004-12-30T17:01:53-05:00', 'periodOfReport': '2004-12-30', 'cik': '1064122', 'ticker': '', 'companyName': 'SCOTTISH RE GROUP LTD', 'items': ['Item 4.02: Non-Reliance on Previously Issued Financial Statements or a Related Audit Report or Completed Interim Review'], 'item4_02': {'keyComponents': "Scottish Re Group Limited determined that its International Segment had incorrectly reported premiums earned, claims and other policy benefits, acquisition costs and other insurance expenses and related income tax benefits in the quarters ended June 30, 2004 and September 30, 2004. The errors were made in the process of compiling income statement information on the accrual of premiums and resulted from incorrect references within the spreadsheets used to calculate these accruals. The errors were detected by management as part of their ongoing doc