# Example: Apple 2020

In [1]:
## Loading packages

from openesef.base.pool import Pool, const
from openesef.engines import tax_reporter
from openesef.taxonomy.taxonomy import Taxonomy
#from openesef.base.fbase import XmlFileBase
from openesef.edgar.edgar import EG_LOCAL
from openesef.edgar.stock import Stock
from openesef.edgar.filing import Filing
from openesef.util.parse_concepts import *
#from openesef.taxonomy.linkbase import Linkbase
from openesef.instance.instance import Instance
import re
from lxml import etree as lxml_etree
from io import StringIO, BytesIO
import traceback
import fs
import logging 




In [2]:
#set logging
from openesef.util.util_mylogger import setup_logger #util_mylogger


if __name__=="__main__":
    logger = setup_logger("main", logging.DEBUG, log_dir="/tmp/log/")
else:
    logger = logging.getLogger("main.openesf.try") 

In [3]:
egl = EG_LOCAL('/text/edgar')
# Create an in-memory filesystem



In [4]:
stock = Stock('AAPL', egl = egl); #self = stock
filing = stock.get_filing(period='annual', year=2020) #self = filing
#filing = Filing(url="Archives/edgar/data/1318605/0001564590-20-004475.txt", egl = egl)

2025-03-02 22:59:42,455 - main.openesf.edgar - PID:55007 - DEBUG - cik for AAPL is 320193
2025-03-02 22:59:42,872 - main.openesf.edgar - PID:55007 - DEBUG - getting data at https://www.sec.gov/Archives/edgar/full-index/2020/index.json
2025-03-02 22:59:43,730 - main.openesf.edgar - PID:55007 - DEBUG - No annual filing info found for year=2020 quarter=4. Finding latest.
2025-03-02 22:59:43,840 - main.openesf.edgar - PID:55007 - DEBUG - Loaded filing from cache: /text/edgar/10k-bycik/320193/0000320193-20-000096/0000320193-20-000096.txt.gz
2025-03-02 22:59:43,841 - main.openesf.edgar - PID:55007 - DEBUG - Processing SGML from cache: https://www.sec.gov/Archives/edgar/data/320193/0000320193-20-000096.txt


In [5]:
entry_points = []
memfs = fs.open_fs('mem://')
for key, filename in filing.xbrl_files.items():
    logger.info(f"{key}, {filing.documents[filename].type}, {filename}")
    content = filing.documents[filename].doc_text.data #.get("<XML>", "")
    content = list(content.values())[0] if type(content) == dict else content
    print(content[:100])
    with  memfs.open(filename, 'w') as f:
        f.write(content)
    logger.info(f"Successfully cached {filename} to memory, length={len(content)}")
    entry_points.append(f"mem://{filename}")

2025-03-02 22:59:44,316 - main - PID:55007 - INFO - sch, EX-101.SCH, aapl-20200926.xsd
2025-03-02 22:59:44,317 - main - PID:55007 - INFO - Successfully cached aapl-20200926.xsd to memory, length=64224
2025-03-02 22:59:44,317 - main - PID:55007 - INFO - cal, EX-101.CAL, aapl-20200926_cal.xml
2025-03-02 22:59:44,318 - main - PID:55007 - INFO - Successfully cached aapl-20200926_cal.xml to memory, length=158906
2025-03-02 22:59:44,318 - main - PID:55007 - INFO - def, EX-101.DEF, aapl-20200926_def.xml
2025-03-02 22:59:44,319 - main - PID:55007 - INFO - Successfully cached aapl-20200926_def.xml to memory, length=347474
2025-03-02 22:59:44,319 - main - PID:55007 - INFO - lab, EX-101.LAB, aapl-20200926_lab.xml
2025-03-02 22:59:44,320 - main - PID:55007 - INFO - Successfully cached aapl-20200926_lab.xml to memory, length=897633
2025-03-02 22:59:44,320 - main - PID:55007 - INFO - pre, EX-101.PRE, aapl-20200926_pre.xml
2025-03-02 22:59:44,321 - main - PID:55007 - INFO - Successfully cached aapl-2

<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyri
<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyri
<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyri
<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyri
<?xml version="1.0" encoding="UTF-8"?><!--XBRL Document Created with Wdesk from Workiva--><!--Copyri
<?xml version="1.0" encoding="utf-8"?><xbrl  xml:lang="en-US"  xmlns="http://www.xbrl.org/2003/insta


In [6]:
data_pool = Pool(max_error=2, esef_filing_root="mem://", memfs=memfs); #self = data_pool

this_tax = Taxonomy(entry_points=entry_points,
                           container_pool = data_pool, 
                           esef_filing_root="mem://",
                           memfs=memfs)  #

data_pool.current_taxonomy = this_tax

print(this_tax)

2025-03-02 22:59:44,327 - main.openesf.base.pool - PID:55007 - INFO - 

Initializing Pool with cache_folder=None, output_folder=None
2025-03-02 22:59:44,328 - main.openesf.base.pool - PID:55007 - INFO - Using repository cache folder: /Users/mbp16/Dropbox/sciebo/WebScraping+ESEF_Paper/Research/code_fse/openesef/openesef/xbrl_schema
2025-03-02 22:59:44,328 - main.openesf.taxonomy - PID:55007 - DEBUG - Taxonomy.load(): Loading mem://aapl-20200926.xsd with self.esef_filing_root=mem://
2025-03-02 22:59:44,329 - main.openesf.taxonomy - PID:55007 - DEBUG - Calling self.pool.add_reference(...) with href = mem://aapl-20200926.xsd, base = "", esef_filing_root = mem://
2025-03-02 22:59:44,329 - main.openesf.taxonomy - PID:55007 - DEBUG - Loading mem://aapl-20200926.xsd from file/URL
2025-03-02 22:59:44,329 - main.openesf.base.pool - PID:55007 - DEBUG - ==  Calling _resolve_url(...): href: mem://aapl-20200926.xsd, base: , esef_filing_root: mem://
2025-03-02 22:59:44,330 - main.openesf.base.pool - 

Schemas: 15
Linkbases: 15
Role Types: 775
Arcrole Types: 0
Concepts: 18391
Item Types: 52
Tuple Types: 0
Simple Types: 0
Labels: 1527
References: 0
Hierarchies: 239
Dimensional Relationship Sets: 239
Dimensions: 295
Hypercubes: 366
Enumerations: 0
Enumerations Sets: 0
Table Groups: 0
Tables: 0
Parameters: 0
Assertion Sets: 0
Value Assertions: 0
Existence Assertions: 0
Consistency Assertions: 0


In [7]:
if filing.xbrl_files.get("xml"):
    xml_filename = filing.xbrl_files.get("xml")
    instance_str = filing.documents[xml_filename].doc_text.data #.get("<XML>", "")
    instance_str = list(instance_str.values())[0] if type(instance_str) == dict else instance_str
    #instance_str = clean_doc(instance_str)
    instance_byte = instance_str.encode('utf-8')
    instance_io = BytesIO(instance_byte)
    instance_tree = lxml_etree.parse(instance_io)
    root = instance_tree.getroot()
    data_pool.cache_from_string(location=xml_filename, content=instance_str, memfs=memfs)
    xid = Instance(container_pool=data_pool, root=root, memfs=memfs)

print(xid)

2025-03-02 22:59:49,565 - main.openesf.base.resolver - PID:55007 - DEBUG - Successfully cached aapl-20200926_htm.xml to memory with href=mem://aapl-20200926_htm.xml


Namespaces: 10
Schema references: 1
Linkbase references: 0
Contexts: 318
Units: 9
Facts: 1388
Footnotes: 0
Filing Indicators: 0


In [8]:
for i, (key, value) in enumerate(xid.dei.items()):
    print(f"{i}: {key}: {value}")


0: AmendmentFlag: false
1: DocumentFiscalYearFocus: 2020
2: DocumentFiscalPeriodFocus: FY
3: EntityCentralIndexKey: 0000320193
4: CurrentFiscalYearEndDate: --09-26
5: DocumentType: 10-K
6: DocumentAnnualReport: true
7: DocumentPeriodEndDate: 2020-09-26
8: DocumentTransitionReport: false
9: EntityFileNumber: 001-36743
10: EntityRegistrantName: Apple Inc.
11: EntityIncorporationStateCountryCode: CA
12: EntityTaxIdentificationNumber: 94-2404110
13: EntityAddressAddressLine1: One Apple Park Way
14: EntityAddressCityOrTown: Cupertino
15: EntityAddressStateOrProvince: CA
16: EntityAddressPostalZipCode: 95014
17: CityAreaCode: 408
18: LocalPhoneNumber: 996-1010
19: TradingSymbol: AAPL
20: EntityWellKnownSeasonedIssuer: Yes
21: EntityVoluntaryFilers: No
22: EntityCurrentReportingStatus: Yes
23: EntityInteractiveDataCurrent: Yes
24: EntityFilerCategory: Large Accelerated Filer
25: EntitySmallBusiness: false
26: EntityEmergingGrowthCompany: false
27: IcfrAuditorAttestationFlag: true
28: EntitySh

In [9]:
reporting_contexts = xid.identify_reporting_contexts()
df_contexts = pd.DataFrame.from_dict(reporting_contexts, orient="index")
print(df_contexts)


                                                                   context_id  \
CurrentInstanceDateContext        i747bec89b4e84f74ae3445db3509f609_I20200926   
CurrentPeriodContext        i223bd574caab4f739f73936be6065c72_D20190929-20...   
PriorInstanceDateContext          ic3ea678a3e394e00880d68882e8bdc02_I20200327   
PriorPeriodContext          i5085fb79a9a14a9aae9b909beb32bce2_D20180930-20...   

                                    period_string     instant  start_date  \
CurrentInstanceDateContext             2020-09-26  2020-09-26         NaN   
CurrentPeriodContext        2019-09-29/2020-09-26         NaN  2019-09-29   
PriorInstanceDateContext               2020-03-27  2020-03-27         NaN   
PriorPeriodContext          2018-09-30/2019-09-28         NaN  2018-09-30   

                              end_date  
CurrentInstanceDateContext         NaN  
CurrentPeriodContext        2020-09-26  
PriorInstanceDateContext           NaN  
PriorPeriodContext          2019-09-28  
