# XBRL data extraction sandbox

Import logging, filepath functions, the XBRL parser functions, pandas, json

In [1]:
from os import listdir, path
from os.path import isfile, join
import logging
from xbrl.cache import HttpCache
from xbrl.instance import XbrlParser, XbrlInstance
import json
import pandas as pd

Import account data extraction funtions

In [2]:
from digiaccounts_data import get_financial_table, get_startend_period, get_company_address, get_company_registration


Define accounts files and create instance of chosen file for data extractions

In [3]:
account_root = 'accounts_data'

all_accounts = [f for f in listdir(account_root) if isfile(join(account_root, f))]
all_accounts
active = [a for a in all_accounts if '11505896' in a]
# active = [a for a in all_accounts if '07676340' in a]
active

['11505896_aa_2021-05-25.xhtml',
 '11505896_aa_2022-01-07.xhtml',
 '11505896_aa_2022_01_07.xhtml']

In [4]:
logging.basicConfig(level=logging.WARNING)

cache: HttpCache = HttpCache('./cache')
cache.set_headers({'From': 'ahoward@companieshouse.gov.uk', 'User-Agent': 'py-xbrl/2.1.0'})
parser = XbrlParser(cache)

schema = path.join(account_root, active[2])

inst: XbrlInstance = parser.parse_instance(schema)



Print each fact held within the XBRL file for reference

In [5]:
for fact in inst.facts:
    if hasattr(fact.context, 'instant_date'):
        if hasattr(fact, 'unit'):
            print(fact.concept.name, f'£{fact.value}', f'date={fact.context.instant_date}\n')
            #print(fact.unit)
        else:
            print(fact.concept.name, fact.value, f'date={fact.context.instant_date}\n')
    else:
        print(fact.concept.name, fact.value, '\n')
    # if fact.concept.name == 'Creditors':
    #     print(dir(fact.context.segments))
    #     break

FixedAssets £12964.0 date=2021-03-31

FixedAssets £17285.0 date=2020-03-31

CurrentAssets £39112.0 date=2021-03-31

CurrentAssets £21842.0 date=2020-03-31

Creditors £19770.0 date=2021-03-31

Creditors £19036.0 date=2020-03-31

NetCurrentAssetsLiabilities £19342.0 date=2021-03-31

NetCurrentAssetsLiabilities £2806.0 date=2020-03-31

TotalAssetsLessCurrentLiabilities £32306.0 date=2021-03-31

TotalAssetsLessCurrentLiabilities £20091.0 date=2020-03-31

Creditors £25000.0 date=2021-03-31

Creditors £0.0 date=2020-03-31

AccruedLiabilitiesNotExpressedWithinCreditorsSubtotal £0.0 date=2021-03-31

AccruedLiabilitiesNotExpressedWithinCreditorsSubtotal £1165.0 date=2020-03-31

NetAssetsLiabilities £7306.0 date=2021-03-31

NetAssetsLiabilities £18926.0 date=2020-03-31

Equity £7306.0 date=2021-03-31

Equity £18926.0 date=2020-03-31

AverageNumberEmployeesDuringPeriod 4.0 

AverageNumberEmployeesDuringPeriod 4.0 

LegalFormEntity  

StartDateForPeriodCoveredByReport 2020-04-01 date=2020-03-31

E

## Deploy accounts functions
Parse the instance to each of the accounts functions to extract the important account data

In [5]:
pl = get_financial_table(inst)
start, end = get_startend_period(inst)
reg_number = get_company_registration(inst)
address= get_company_address(inst)


KeyError: 'MaturitiesOrExpirationPeriodsDimension'

In [33]:
for fact in inst.facts:
    if (hasattr(fact.context, 'instant_date')) and (hasattr(fact, 'unit')):
            if fact.concept.name == 'Creditors':
                print(fact.json())

{'dimensions': {'unit': 'iso4217:GBP', 'concept': 'Creditors', 'entity': '11505896', 'period': '2019-09-30', 'FinancialInstrumentCurrentNon-currentDimension': 'CurrentFinancialInstruments'}, 'decimals': 0, 'value': 590358.0}
{'dimensions': {'unit': 'iso4217:GBP', 'concept': 'Creditors', 'entity': '11505896', 'period': '2019-09-30', 'FinancialInstrumentCurrentNon-currentDimension': 'Non-currentFinancialInstruments'}, 'decimals': 0, 'value': 750000.0}


In [6]:
pl

Unnamed: 0,Fact,2020-03-31,2021-03-31
0,FixedAssets,17285.0,12964.0
1,CurrentAssets,21842.0,39112.0
2,CreditorsWithinOneYear,19036.0,19770.0
3,NetCurrentAssetsLiabilities,2806.0,19342.0
4,TotalAssetsLessCurrentLiabilities,20091.0,32306.0
5,CreditorsAfterOneYear,0.0,25000.0
6,AccruedLiabilitiesNotExpressedWithinCreditorsS...,1165.0,0.0
7,NetAssetsLiabilities,18926.0,7306.0
8,Equity,18926.0,7306.0


In [7]:
print(f'Period Start Date: {start}') 
print(f'Period End Date: {end}')


Period Start Date: 2020-03-31
Period End Date: 2021-03-31


In [8]:
print(f'Company Registration Number: {reg_number}')

Company Registration Number: 07676340


In [9]:
address

Unnamed: 0,AddressLine1,PrincipalLocation-CityOrTown,PostalCodeZip
0,7 CAER STREET,SWANSEA,SA1 3PP


In [10]:
json.loads(address.to_json(index=False, orient='split'))


{'columns': ['AddressLine1', 'PrincipalLocation-CityOrTown', 'PostalCodeZip'],
 'data': [['7 CAER STREET', 'SWANSEA', 'SA1 3PP']]}

In [11]:
json.loads(pl.to_json(date_format='iso', index=False, orient='split'))


{'columns': ['Fact', '2020-03-31', '2021-03-31'],
 'data': [['FixedAssets', 17285.0, 12964.0],
  ['CurrentAssets', 21842.0, 39112.0],
  ['CreditorsWithinOneYear', 19036.0, 19770.0],
  ['NetCurrentAssetsLiabilities', 2806.0, 19342.0],
  ['TotalAssetsLessCurrentLiabilities', 20091.0, 32306.0],
  ['CreditorsAfterOneYear', 0.0, 25000.0],
  ['AccruedLiabilitiesNotExpressedWithinCreditorsSubtotal', 1165.0, 0.0],
  ['NetAssetsLiabilities', 18926.0, 7306.0],
  ['Equity', 18926.0, 7306.0]]}

In [14]:
x = {
    'registration': reg_number,
    'reporting_period': {'start': start, 'end': end},
    'address': json.loads(address.to_json(index=False, orient='split')),
    'financial_data': json.loads(pl.to_json(date_format='iso', index=False, orient='split'))
}

In [21]:
with open('accout_output.json', 'w') as f:
    json.dump(x, f, indent=4)