## Get Data

In [9]:
import zipfile, glob
from pathlib import Path
import requests

In [28]:
url = 'https://www.sec.gov/files/dera/data/financial-statement-notes-data-sets/2025_02_notes.zip'
data_path = Path('data')
filename = data_path / url.split('/')[-1]

if not data_path.exists():
    print('Creating directory')
    data_path.mkdir()
if not filename.exists():
    print('Downloading...', url)
    headers = {
            'Host': 'www.sec.gov', 'Connection': 'close',
            'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
         }
    r = requests.get(url, headers=headers, allow_redirects=True)
    open(filename, 'wb').write(r.content)
print('...DONE')


Downloading... https://www.sec.gov/files/dera/data/financial-statement-notes-data-sets/2025_02_notes.zip
...DONE


In [29]:
if filename.exists():
    extract_dir = 'data'
    print('Extracting files...')
    with zipfile.ZipFile(filename) as zf:
        zf.extractall(extract_dir)
    for file in glob.glob(extract_dir + '/**', recursive=True):
        print(file)



Extracting files...
data/
data/readme.htm
data/2025_02_notes.zip
data/tag.tsv
data/pre.tsv
data/num.tsv
data/dim.tsv
data/sub.tsv
data/txt.tsv
data/notes-metadata.json
data/ren.tsv
data/cal.tsv


## Observe Data

In [30]:
import pandas as pd

In [33]:
cal_df = pd.read_csv('data/cal.tsv', sep='\t')
cal_df.head()

Unnamed: 0,adsh,grp,arc,negative,ptag,pversion,ctag,cversion
0,0001504461-25-000004,12,5,1,AccruedLiabilitiesCurrent,us-gaap/2023,TaxesPayableCurrent,us-gaap/2023
1,0000769520-25-000009,3,1,1,AccruedLiabilitiesCurrent,us-gaap/2023,ContractWithCustomerLiabilityCurrent,us-gaap/2023
2,0001504461-25-000004,12,2,1,AccruedLiabilitiesCurrent,us-gaap/2023,DistributionPayable,us-gaap/2023
3,0001558370-25-001736,19,26,1,Assets,us-gaap/2023,PropertyPlantAndEquipmentAndFinanceLeaseRightO...,us-gaap/2023
4,0001025835-25-000039,7,2,1,Assets,us-gaap/2023,PropertyPlantAndEquipmentNet,us-gaap/2023


In [34]:
dim_df = pd.read_csv('data/dim.tsv', sep='\t')
dim_df.head()

Unnamed: 0,dimhash,segments,segt
0,0x0942d94b9d9266bf273a2c5123d68019,BalanceSheetLocation=OtherAccruedLiabilities;D...,0
1,0xf48392632836a4db259a7d67c3a4f808,RangesOfExercisePricesForOutstandingShareOptio...,0
2,0x09d3f28b05928a485eb5ca0cf40bc7d0,BusinessSegments=LargeAccountSegment;ProductOr...,0
3,0xb0703db550a95f6c213269cbf1bed75e,MajorPropertyClassUsefulLife=Furnitureequipmen...,0
4,0x89eebd7929107c732859066c1c17c60e,DebtInstrument=SeniorUnsecuredNotesDueMay2023;,0


In [35]:
pre_df = pd.read_csv('data/pre.tsv', sep='\t')
pre_df.head()

Unnamed: 0,adsh,report,line,stmt,inpth,tag,version,prole,plabel,negating
0,0001104659-25-008644,1,2,,0,AmendmentFlag,dei/2023,label,Amendment Flag,0
1,0000826154-25-000036,1,23,,0,AmendmentFlag,dei/2023,terseLabel,Amendment Flag,0
2,0001670541-25-000021,1,25,,0,AmendmentFlag,dei/2023,terseLabel,Amendment Flag,0
3,0001437749-25-002597,1,22,,0,AmendmentFlag,dei/2023,label,Amendment Flag,0
4,0001589526-25-000017,1,23,,0,AmendmentFlag,dei/2023,terseLabel,Amendment Flag,0


In [36]:
ren_df = pd.read_csv('data/ren.tsv', sep='\t')
ren_df.head()

Unnamed: 0,adsh,report,rfile,menucat,shortname,longname,roleuri,parentroleuri,parentreport,ultparentrpt
0,0001628280-25-003396,1,H,C,Cover,0000001 - Document - Cover,http://daveandbusters.com/role/Cover,,,
1,0001493152-25-004617,1,H,C,Cover,00000001 - Document - Cover,http://fr8technologies.com/role/Cover,,,
2,0001493152-25-004615,1,H,C,Cover,00000001 - Document - Cover,http://microvision.com/role/Cover,,,
3,0001493152-25-004614,1,H,C,Cover,00000001 - Document - Cover,http://safetyshotofficial.com/role/Cover,,,
4,0001493152-25-004609,1,H,C,Cover,00000001 - Document - Cover,http://aimimmuno.com/role/Cover,,,


In [37]:
sub_df = pd.read_csv('data/sub.tsv', sep='\t')
sub_df.head()

Unnamed: 0,adsh,cik,name,sic,countryba,stprba,cityba,zipba,bas1,bas2,...,accepted,prevrpt,detail,instance,nciks,aciks,pubfloatusd,floatdate,floataxis,floatmems
0,0000012927-25-000015,12927,BOEING CO,3721.0,US,VA,ARLINGTON,22202,929 LONG BRIDGE DRIVE,,...,2025-02-03 14:38:00.0,0,1,ba-20241231_htm.xml,1,,112000000000.0,20240630.0,,1.0
1,0000017313-25-000009,17313,CAPITAL SOUTHWEST CORP,,US,TX,DALLAS,75225,8333 DOUGLAS AVE,SUITE 1100,...,2025-02-03 16:06:00.0,0,0,cswc-20250203_htm.xml,1,,,,,
2,0000021076-25-000010,21076,CLOROX CO /DE/,2842.0,US,CA,OAKLAND,94612-1888,THE CLOROX COMPANY,1221 BROADWAY,...,2025-02-03 16:12:00.0,0,0,clx-20250203_htm.xml,1,,,,,
3,0000021076-25-000013,21076,CLOROX CO /DE/,2842.0,US,CA,OAKLAND,94612-1888,THE CLOROX COMPANY,1221 BROADWAY,...,2025-02-03 16:45:00.0,0,1,clx-20241231_htm.xml,1,,,,,
4,0000035527-25-000012,35527,FIFTH THIRD BANCORP,6022.0,US,OH,CINCINNATI,45263,38 FOUNTAIN SQ PLZ,FIFTH THIRD CENTER,...,2025-02-03 08:35:00.0,0,0,fitb-20250203_htm.xml,1,,,,,


In [38]:
tag_df = pd.read_csv('data/tag.tsv', sep='\t')
tag_df.head()

Unnamed: 0,tag,version,custom,abstract,datatype,iord,crdr,tlabel,doc
0,A.5000EuroDenominatedNotesDue2033Member,0000059478-25-000067,1,1,member,,,.5000% Euro Denominated Notes Due 2033 [Member],.5000% Euro Denominated Notes Due 2033
1,A.C.SolucoesParaPiscinasLda.Member,0000945841-25-000032,1,1,member,,,"A.C. Solucoes para Piscinas, Lda. [Member]","A.C. Solucoes para Piscinas, Lda."
2,A.NattermannCie.GmbHMember,0001121404-25-000010,1,1,member,,,A. Nattermann & Cie. GmbH [Member],A. Nattermann & Cie. GmbH [Member]
3,A.PacificMember,0000021344-25-000011,1,1,member,,,A. Pacific [Member],A component of an entity for which there is an...
4,A0.0007.500NotesDue20252028Member,0001628280-25-009007,1,1,member,,,0.000% 7.500% Notes Due 2025/2028 [Member],0.000% 7.500% Notes Due 2025/2028


In [39]:
txt_df = pd.read_csv('data/txt.tsv', sep='\t')
txt_df.head()

Unnamed: 0,adsh,tag,version,ddate,qtrs,iprx,lang,dcml,durp,datp,dimh,dimn,coreg,escaped,srclen,txtlen,footnote,footlen,context,value
0,0000004127-25-000010,EntityCentralIndexKey,dei/2024,20241231,1,0,en-US,32767,0.013699,4.0,0x00000000,0,,0,10,10,,0,c-1,0000004127
1,0000005513-25-000002,LocalPhoneNumber,dei/2024,20250131,0,0,en-US,32767,0.0,-4.0,0x00000000,0,,0,8,8,,0,c-1,294-1011
2,0000005513-25-000015,StockholdersEquityNoteDisclosureTextBlock,us-gaap/2024,20241231,4,0,en-US,32767,0.0,0.0,0x00000000,0,,1,50371,4969,,0,c-1,Common Stock As part of our capital deployment...
3,0000006201-25-000010,FairValueAssetsMeasuredOnRecurringBasisTextBlock,us-gaap/2024,20241231,4,0,en-US,32767,0.0,0.0,0x00000000,0,,1,51969,1940,,0,c-1,Assets measured at fair value on a recurring b...
4,0000016868-25-000011,DebtInstrumentMaturityDate,us-gaap/2024,20241231,4,0,en-US,32767,0.0,0.0,0x3eb07b98254e11d113e6e3d4fd075180,1,,0,10,10,,0,c-160,2033-11-01
