In [2]:
# import our libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup
import sys
import time


In [34]:
# Code from code project

# Access page
cik = '0001403161'
type = '10-K'
dateb = '20200101'

# Obtain HTML for search page
base_url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={}&type={}&dateb={}"
edgar_resp = requests.get(base_url.format(cik, type, dateb))
edgar_str = edgar_resp.text

# Wait for 10 seconds
time.sleep(10)

# Find the document link
doc_link = ''
soup = BeautifulSoup(edgar_str, 'html.parser')
table_tag = soup.find('table', class_='tableFile2')
rows = table_tag.find_all('tr')
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 3:
        if '2018' in cells[3].text:
            doc_link = 'https://www.sec.gov' + cells[1].a['href']

# Exit if document link couldn't be found
if doc_link == '':
    print("Couldn't find the document link")
    sys.exit()

# Obtain HTML for document page
doc_resp = requests.get(doc_link)
doc_str = doc_resp.text

# Wait for 10 seconds
time.sleep(10)

# Find the XBRL link
xbrl_link = ''
soup = BeautifulSoup(doc_str, 'html.parser')
table_tag = soup.find('table', class_='tableFile', summary='Data Files')
rows = table_tag.find_all('tr')
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 3:
        if 'INS' in cells[3].text:
            xbrl_link = 'https://www.sec.gov' + cells[2].a['href']
            

# Exit if document link couldn't be found
if xbrl_link == '':
    print("Couldn't find the document link")
    sys.exit()
    

# Obtain XBRL text from document
xbrl_resp = requests.get(xbrl_link)
xbrl_str = xbrl_resp.text

# Find and print value and date for the specified tag from the XBRL accounts
soup = BeautifulSoup(xbrl_str, 'lxml')
tag_list = soup.find_all()

#This section of code creates a context table. 
#The context table is a dictionary of context names keys that reference dictionary values 
#containing date information for each context. For contexts with datetype of 'period' the table 
#contains the start and end date. For contexts with datetype of 'instant' the context
#contains the instant date of the context. All entries include a date and dateType value.
#For contexts with datetype of period, the date is equal to the enddate of the context.

contexts = {}

for tag in tag_list:
	if tag.name == 'xbrli:context':
		
		#This section of code finds the start date of the context if it exists.
		start_date_tag = tag.find(name = 'xbrli:startdate')
		if start_date_tag == None:
			start_date = None
		else:
			start_date = start_date_tag.text
		
		#This section of code finds the end date of the context if it exists.
		end_date_tag = tag.find(name = 'xbrli:enddate')
		if end_date_tag == None:
			end_date = None
		else:
			end_date = end_date_tag.text
			date = end_date_tag.text
			datetype = 'period'
			
		#This section of code finds the instant date of the context if it exists.
		instant_date_tag = tag.find(name = 'xbrli:instant')
		if instant_date_tag != None:
			date = instant_date_tag.text
			datetype = 'instant'
		
		#build a dictionary of date information within a dictionary of context titles
		dtinfo = {'date' : date, 'year' : date[0:4], 'datetype' : datetype, 'startdate' : start_date, 'enddate' : end_date}
		contexts[tag.attrs['id']] = dtinfo
		 
# Find and print stockholder's equity
for tag in tag_list:
    if tag.name == 'us-gaap:stockholdersequity':
        year = contexts[tag.attrs['contextref']]['year']
        print(year + " Stockholder's equity: " + tag.text)

print('I am finished. If nothing else is printed specified tag is not in the accounts.')

# Find and print stockholder's equity
for tag in tag_list:
    if tag.name == 'entitycurrentreportingstatus':
        year = contexts[tag.attrs['contextref']]['year']
        print(year + " Entity current reporting status: " + tag.text)

print('I am finished. If nothing else is printed specified tag is not in the accounts.')

# Find and print stockholder's equity
for tag in tag_list:
    if tag.name == 'us-gaap:assets':
        year = contexts[tag.attrs['contextref']]['year']
        print(year + " Assets: " + tag.text)

print('I am finished. If nothing else is printed specified tag is not in the accounts.')

# Find and print stockholder's equity
for tag in tag_list:
    if tag.name == 'us-gaap:assetscurrent':
        year = contexts[tag.attrs['contextref']]['year']
        print(year + " Assets Current: " + tag.text)

print('I am finished. If nothing else is printed specified tag is not in the accounts.')


I am finished. If nothing else is printed specified tag is not in the accounts.
I am finished. If nothing else is printed specified tag is not in the accounts.
2017Assets: 67977000000
2018Assets: 69225000000
I am finished. If nothing else is printed specified tag is not in the accounts.
2017Assets Current: 19023000000
2018Assets Current: 18216000000
I am finished. If nothing else is printed specified tag is not in the accounts.


In [19]:
doc_link

'https://www.sec.gov/Archives/edgar/data/1403161/000140316119000050/0001403161-19-000050-index.htm'

In [14]:
xbrl_link

'https://www.sec.gov/Archives/edgar/data/1403161/000140316118000055/v-20180930.xml'

#### Link for 2019 accounts
Below link was found manually
https://www.sec.gov/Archives/edgar/data/1403161/000140316119000050/v093019form10k_htm.xml

currently code can't find links to 2019 accounts as edgar does not have accounts ending in v-20190101.xml

But we still get to the main page with all avaliable filings as "doc_link"
https://www.sec.gov/Archives/edgar/data/1403161/000140316119000050/0001403161-19-000050-index.htm

#### List of xbrl tags avaliable in the accounts

Documentation for xbrl tags on SEC website is limited and confusing. 
It is easier to identify tags that are useful by reviewing avaliable tags from the scraped accounts.  

In [25]:
# creating a list of tag names
list_of_tags = []

for tag in tag_list:
    list_of_tags.append(tag.name)
    
    

In [26]:
# making list unique
# converting list to set
list_of_tags
clean_tags = set(list_of_tags)
clean_tags

{'body',
 'dei:amendmentflag',
 'dei:currentfiscalyearenddate',
 'dei:documentfiscalperiodfocus',
 'dei:documentfiscalyearfocus',
 'dei:documentperiodenddate',
 'dei:documenttype',
 'dei:entitycentralindexkey',
 'dei:entitycommonstocksharesoutstanding',
 'dei:entitycurrentreportingstatus',
 'dei:entityemerginggrowthcompany',
 'dei:entityfilercategory',
 'dei:entitypublicfloat',
 'dei:entityregistrantname',
 'dei:entityshellcompany',
 'dei:entitysmallbusiness',
 'dei:entityvoluntaryfilers',
 'dei:entitywellknownseasonedissuer',
 'html',
 'link:footnote',
 'link:footnotearc',
 'link:footnotelink',
 'link:loc',
 'link:schemaref',
 'us-gaap:accountspayablecurrent',
 'us-gaap:accountsreceivablenetcurrent',
 'us-gaap:accruedincometaxescurrent',
 'us-gaap:accruedincometaxesnoncurrent',
 'us-gaap:accruedliabilitiescurrent',
 'us-gaap:accumulateddepreciationdepletionandamortizationpropertyplantandequipment',
 'us-gaap:accumulatedothercomprehensiveincomelossavailableforsalesecuritiesadjustmentne