# XBRL data extraction sandbox

Import logging, filepath functions, the XBRL parser functions, pandas, json

In [1]:
from os import listdir, path
from os.path import isfile, join
import logging
from xbrl.cache import HttpCache
from xbrl.instance import XbrlParser, XbrlInstance
import json
import pandas as pd

Import account data extraction funtions

In [2]:
from digiaccounts.digiaccounts_data import (
    get_financial_table,
    get_startend_period,
    get_company_address,
    get_company_registration,
    get_accounting_software,
    get_share_info,
    get_director_names
)





Define accounts files and create instance of chosen file for data extractions

In [3]:
account_root = 'accounts_data'

all_accounts = [f for f in listdir(account_root) if isfile(join(account_root, f))]
all_accounts
active = [a for a in all_accounts if '11505896' in a]  # Estates
# active = [a for a in all_accounts if '07676340' in a]  # Abbey Roofing
# active = [a for a in all_accounts if '11790215' in a]  # Dormant Accounts
active

['11505896_aa_2020-03-20.xhtml',
 '11505896_aa_2021-05-25.xhtml',
 '11505896_aa_2022-01-07.xhtml']

In [4]:
logging.basicConfig(level=logging.WARNING)

cache: HttpCache = HttpCache('./cache')
cache.set_headers({'From': 'ahoward@companieshouse.gov.uk', 'User-Agent': 'py-xbrl/2.1.0'})
parser = XbrlParser(cache)

schema = path.join(account_root, active[0])

inst: XbrlInstance = parser.parse_instance(schema)

In [5]:
json.loads(inst.json())['facts']

{'f0': {'dimensions': {'unit': 'xbrli:pure',
   'concept': 'ParValueShare',
   'entity': '11505896',
   'period': '2018-08-08/2019-09-30',
   'EntityShareClassesDimension': 'OrdinaryShareClass1'},
  'decimals': 0,
  'value': 1.0},
 'f1': {'dimensions': {'unit': 'iso4217:GBP',
   'concept': 'NominalValueSharesIssuedSpecificShareIssue',
   'entity': '11505896',
   'period': '2018-08-08/2019-09-30',
   'EntityShareClassesDimension': 'OrdinaryShareClass1'},
  'decimals': 0,
  'value': 1.0},
 'f2': {'dimensions': {'unit': 'iso4217:GBP',
   'concept': 'PropertyPlantEquipment',
   'entity': '11505896',
   'period': '2019-09-30'},
  'decimals': 0,
  'value': 80316.0},
 'f3': {'dimensions': {'unit': 'iso4217:GBP',
   'concept': 'InvestmentProperty',
   'entity': '11505896',
   'period': '2019-09-30'},
  'decimals': 0,
  'value': 1216063.0},
 'f4': {'dimensions': {'unit': 'iso4217:GBP',
   'concept': 'FixedAssets',
   'entity': '11505896',
   'period': '2019-09-30'},
  'decimals': 0,
  'value': 

Print each fact held within the XBRL file for reference

In [6]:
for fact in inst.facts:
    if hasattr(fact.context, 'instant_date'):
        if hasattr(fact, 'unit'):
            print(fact.concept.name, f'£{fact.value}', f'date={fact.context.instant_date}\n')
            #print(fact.unit)
        else:
            print(fact.concept.name, fact.value, f'date={fact.context.instant_date}\n')
    else:
        print(fact.concept.name, fact.value, '\n')
    # if fact.concept.name == 'Creditors':
    #     print(dir(fact.context.segments))
    #     break

ParValueShare 1.0 

NominalValueSharesIssuedSpecificShareIssue 1.0 

PropertyPlantEquipment £80316.0 date=2019-09-30

InvestmentProperty £1216063.0 date=2019-09-30

FixedAssets £1296379.0 date=2019-09-30

Debtors £8154.0 date=2019-09-30

CashBankOnHand £37516.0 date=2019-09-30

CurrentAssets £45670.0 date=2019-09-30

Creditors £590358.0 date=2019-09-30

NetCurrentAssetsLiabilities £-544688.0 date=2019-09-30

TotalAssetsLessCurrentLiabilities £751691.0 date=2019-09-30

Creditors £750000.0 date=2019-09-30

ProvisionsForLiabilitiesBalanceSheetSubtotal £303.0 date=2019-09-30

NetAssetsLiabilities £1388.0 date=2019-09-30

Equity £100.0 date=2019-09-30

Equity £1288.0 date=2019-09-30

Equity £1388.0 date=2019-09-30

AverageNumberEmployeesDuringPeriod 2.0 

TotalAdditionsIncludingFromBusinessCombinationsPropertyPlantEquipment 83086.0 

PropertyPlantEquipmentGrossCost £83086.0 date=2019-09-30

IncreaseFromDepreciationChargeForYearPropertyPlantEquipment 2770.0 

AccumulatedDepreciationImpairmen

## Deploy accounts functions
Parse the instance to each of the accounts functions to extract the important account data

In [7]:
pl = get_financial_table(inst)
start, end = get_startend_period(inst)
reg_number = get_company_registration(inst)
address= get_company_address(inst)
software = get_accounting_software(inst)
shares = get_share_info(inst)
directors = get_director_names(inst)

In [8]:
directors

{'Director2': 'N Baldwin', 'Director1': 'S D Fidoe BSc.Hons'}

In [8]:
software

'IRIS Accounts Production'

In [9]:
for s in shares:
    print(s)

['ParValueShare', None, 1.0, None]
['NominalValueSharesIssuedSpecificShareIssue', '£', 1.0, None]
['EquityShareCapital', '£', 100.0, '2019-09-30']
['NumberSharesIssuedFullyPaid', None, 100.0, '2019-09-30']
['NumberSharesIssuedSpecificShareIssue', None, 100.0, None]
['DescriptionShareType', None, 'Ordinary', None]
['DescriptionReasonsForSpecificShareIssue', None, 'cash at par', None]


In [10]:
pl

Date,Fact,2019-09-30
0,AccumulatedDepreciationImpairmentPropertyPlant...,2770.0
1,BankBorrowings,750000.0
2,BankBorrowingsOverdrafts,750000.0
3,CashBankOnHand,37516.0
4,CreditorsDueAfterOneYear,750000.0
5,CreditorsDueWithinOneYear,590358.0
6,CurrentAssets,45670.0
7,Debtors,8154.0
8,Equity,1388.0
9,EquityRetainedEarningsAccumulatedLosses,1288.0


In [11]:
print(f'Period Start Date: {start}') 
print(f'Period End Date: {end}')


Period Start Date: 2019-09-30
Period End Date: 2019-09-30


In [12]:
print(f'Company Registration Number: {reg_number}')

Company Registration Number: 11505896


In [13]:
address

Unnamed: 0,AddressLine1,AddressLine2,AddressLine3,PrincipalLocation-CityOrTown,PostalCodeZip
0,Mews Lodge,The Eades,Monsell Lane,Upton-upon-Severn,WR8 0QN


In [58]:
# json.loads(address.to_json(index=False, orient='split'))


In [59]:
# json.loads(pl.to_json(date_format='iso', index=False, orient='split'))


In [60]:
# x = {
#     'registration': reg_number,
#     'reporting_period': {'start': start, 'end': end},
#     'address': json.loads(address.to_json(index=False, orient='split')),
#     'financial_data': json.loads(pl.to_json(date_format='iso', index=False, orient='split'))
# }

In [61]:
# with open('accout_output.json', 'w') as f:
#     json.dump(x, f, indent=4)

In [10]:
schema = path.join('digiaccounts/tests/data/example_happy.xhtml')

inst: XbrlInstance = parser.parse_instance(schema)

In [14]:
for fact in inst.facts:
    print(fact.concept.name, fact.value, fact.xml_id)
    print(hasattr(fact.context, 'instant_date'))

CashBankOnHand 12345000000.0 coh1
True
CashBankOnHand 12345000000.0 coh2
True
InvestmentProperty 9876000000.0 inv1
True
InvestmentProperty 9876000000.0 inv2
True
NameEntityOfficer J SMITH
         dir1
False
NameEntityOfficer F BLOGS
         dir2
False
UKCompaniesHouseRegisteredNumber 
            0000000000 reg
False


In [13]:
fact.instant_date

AttributeError: 'TextFact' object has no attribute 'instant_date'