# Read XBRL from Taxonomy

タクソノミ定義の情報を参照して、XBRLから情報抽出を行う。


In [28]:
import os
from pathlib import Path
import edinet

# サントリーホールディングス株式会社/H31.03.26 12:50
DOC_ID = "S100FGSC"

# Data Folder
DATA_ROOT = Path.cwd().joinpath("data")

## Download XBRL

In [29]:
from edinet.xbrl_file import XBRLDir


# Download and load document
xbrl_path = edinet.api.document.get_xbrl(
    DOC_ID, save_dir=DATA_ROOT.joinpath("raw"),
    expand_level="dir")

xbrl_dir = XBRLDir(xbrl_path)



## Read Document Schema

Get namespaces

In [30]:
xsd = xbrl_dir.xsd

# Get import xsd files
imports = {}
for i in xsd.find("xsd:import"):
    imports[i["namespace"]] = i["schemaLocation"]


# Get list of namespace
schema = xsd.find("xsd:schema")
namespace_prefix = "xmlns:"
namespaces = []

for a in schema.attrs:
    if a.startswith(namespace_prefix):
        name = a[len(namespace_prefix):]
        namespace = schema[a]
        loc = "" if namespace not in imports else imports[namespace]
        n = Namespace(name, namespace, loc)
        namespaces.append(n)


In [31]:
xsd = xbrl_dir.xsd

# Get import xsd files
imports = {}
for i in xsd.find("xsd:import"):
    imports[i["namespace"]] = i["schemaLocation"]


# Get list of namespace
schema = xsd.find("xsd:schema")
namespace_prefix = "xmlns:"
namespaces = []

for a in schema.attrs:
    if a.startswith(namespace_prefix):
        name = a[len(namespace_prefix):]
        namespace = schema[a]
        loc = "" if namespace not in imports else imports[namespace]
        n = Namespace(name, namespace, loc)
        namespaces.append(n)


Get taxonomy file

In [32]:
taxonomies = {
    2013: "https://www.fsa.go.jp/search/20130821/editaxonomy2013New.zip",
    2014: "https://www.fsa.go.jp/search/20140310/1c.zip",
    2015: "https://www.fsa.go.jp/search/20150310/1c.zip",
    2016: "https://www.fsa.go.jp/search/20160314/1c.zip",
    2017: "https://www.fsa.go.jp/search/20170228/1c.zip",
    2018: "https://www.fsa.go.jp/search/20180228/1c_Taxonomy.zip",
    2019: "https://www.fsa.go.jp/search/20190228/1c_Taxonomy.zip"
}

Confirm fiscal year and target taxonomy

In [33]:
from datetime import datetime


fiscal_year_end = xbrl_dir.xbrl.find("jpdei_cor:CurrentFiscalYearEndDateDEI").text
fiscal_year_end = datetime.strptime(fiscal_year_end, "%Y-%m-%d")
taxonomy_year = -1

for y in taxonomies:
    boarder_date = datetime(y, 3, 31)
    if fiscal_year_end > boarder_date:
        taxonomy_year = y
    else:
        break

print(taxonomy_year)

2018


Download taxonomy

In [47]:
from zipfile import ZipFile
import requests


external_dir = DATA_ROOT.joinpath("external")
expand_dir = external_dir.joinpath("taxonomy").joinpath(str(taxonomy_year))
taxonomy_file = external_dir.joinpath(f"{taxonomy_year}_taxonomy.zip")

download = False

if not external_dir.exists():
    external_dir.mkdir(parents=True, exist_ok=True)
    download = True

if not expand_dir.exists():
    expand_dir.mkdir(parents=True, exist_ok=True)
    download = True

if download:
    # Download
    external_dir.mkdir(parents=True, exist_ok=True)
    r = requests.get(taxonomies[taxonomy_year], stream=True)
    with taxonomy_file.open(mode="wb") as f:
        for chunk in r.iter_content(1024):
            f.write(chunk)

    # Extract
    with ZipFile(taxonomy_file, "r") as zip:
        for f in zip.namelist():
            # Avoid Japanese path 
            dirs = f.split("/")
            if dirs[2] == "taxonomy":
                _to = expand_dir.joinpath("/".join(dirs[3:]))
                _to.parent.mkdir(parents=True, exist_ok=True)
                with _to.open("wb") as _to_f:
                    _to_f.write(zip.read(f))

    taxonomy_file.unlink()

expand_dir

WindowsPath('c:/Users/ico/Documents/works/xbrl_read_tutorial/data/external/taxonomy/2018')

Read from taxonomy

[]