In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time
from financial_reports_collector import filings_collector
import datetime as dt
import requests
import pprint as pp
import xml.etree.ElementTree as ET

In [2]:
# Access page
ticker = input('ticker?').lower()
file_type = input('What file?(10-K,10-Q)').upper()

# Obtain HTML for search page
establishing_url = f'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={ticker}&type={file_type}&dateb=&owner=exclude&start=0&count=100'
edgar_resp = requests.get(establishing_url)
edgar_str = edgar_resp.text

ticker?gme
What file?(10-K,10-Q)10-k


In [3]:
# Find the document link and create df
reports_list = []
doc_link = ''
soup = BeautifulSoup(edgar_str, 'html.parser')
table_tag = soup.find('table', class_='tableFile2')
rows = table_tag.find_all('tr')
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 3:
        filing = cells[0].text
        doc_link = 'https://www.sec.gov' + cells[1].a['href']
        filing_date = dt.date.fromisoformat(cells[3].text)
        entry = {'filings' : filing, 'doc_link' : doc_link, 'date' : filing_date}
        reports_list.append(entry)

In [4]:
#  Find the XBRL link
for report in reports_list:
    doc_link = report['doc_link']
    doc_resp = requests.get(doc_link)
    doc_str = doc_resp.text
    soup = BeautifulSoup(doc_str, 'html.parser')
    try:
        table_tag = soup.find('table', class_='tableFile', summary='Data Files')
        rows = table_tag.find_all('tr')
        time.sleep(.01)
        for row in rows:
            cells = row.find_all('td')
            if len(cells) > 3:
                if 'INS' in cells[3].text:
                    xbrl_link = 'https://www.sec.gov' + cells[2].a['href']
                    report.update({'xbrl_link' : xbrl_link})
    except:
        report.update({'xbrl_link' : 'N/A'})

In [5]:
filings_df = pd.DataFrame(reports_list)
filings_df.head()
# print(reports_list[0])

Unnamed: 0,date,doc_link,filings,xbrl_link
0,2018-04-02,https://www.sec.gov/Archives/edgar/data/132638...,10-K,https://www.sec.gov/Archives/edgar/data/132638...
1,2017-03-27,https://www.sec.gov/Archives/edgar/data/132638...,10-K,https://www.sec.gov/Archives/edgar/data/132638...
2,2016-03-29,https://www.sec.gov/Archives/edgar/data/132638...,10-K/A,https://www.sec.gov/Archives/edgar/data/132638...
3,2016-03-28,https://www.sec.gov/Archives/edgar/data/132638...,10-K,https://www.sec.gov/Archives/edgar/data/132638...
4,2015-03-30,https://www.sec.gov/Archives/edgar/data/132638...,10-K,https://www.sec.gov/Archives/edgar/data/132638...


In [6]:
tag_name_list = []

lxml_ten_q = filings_df['xbrl_link'][0]

xbrl_resp = requests.get(lxml_ten_q)
xbrl_str = xbrl_resp.text

root = ET.fromstring(xbrl_str)


uri_dict = {'xbrl' :'http://www.xbrl.org/2003/instance',
           'sec' : 'http://xbrl.sec.gov/dei/2014-01-31', 
           'company' : 'http://www.gamestop.com/20180203',
           'gaap' : 'http://fasb.org/us-gaap/2017-01-31'}

In [17]:
gaap_keys = []
for child in root:
    tag_list = list(child.tag)
    uri_pos = tag_list.index('}')
    uri = ''.join(tag_list[1:uri_pos])
    name = ''.join(tag_list[int(uri_pos+1):len(tag_list)])
    if uri == uri_dict['gaap']:
        if name:
            try:
                decimals = int(child.attrib['decimals'])
                if decimals < 0:
                    data = int(child.text)
                else:
                    date = float(child.text)
                context_ref = child.attrib['contextRef']
                new_key = {'key' : name, 'context_ref' : context_ref,
                          'data' : data}
                gaap_keys.append(new_key)
            except:
                print(f'{name}')

BankOverdrafts
BusinessCombinationContingentConsiderationArrangementsDescription
BusinessCombinationGoodwillRecognizedDescription
BusinessCombinationGoodwillRecognizedDescription
CashAndCashEquivalentsPolicyTextBlock
CashAndCashEquivalentsPolicyTextBlock
CommonStockVotingRights
DebtInstrumentBasisSpreadOnVariableRate1
DebtInstrumentBasisSpreadOnVariableRate1
DebtInstrumentCovenantDescription
DebtInstrumentInterestRateStatedPercentage
DebtInstrumentInterestRateStatedPercentage
DividendsPayableDateDeclaredDayMonthAndYear
EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate
EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate
EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate
EffectiveIncomeTaxRateReconciliationChangeInDeferredTaxAssetsValuationAllowance
EffectiveIncomeTaxRateReconciliationChangeInDeferredTaxAssetsValuationAllowance
EffectiveIncomeTaxRateReconciliationChangeInDeferredTaxAssetsValuationAllowance
EffectiveIncomeTaxRateReconcili

In [18]:
gaap_names_df = pd.DataFrame(gaap_keys)
gaap_names_df.to_csv(f'../Data/{ticker}_key.csv', index=False)
gaap_names_df.head()

Unnamed: 0,context_ref,data,key
0,FI2016Q4,616600000,AccountsPayableCurrent
1,FI2017Q4,902000000,AccountsPayableCurrent
2,FI2016Q4,52000000,AccrualForTaxesOtherThanIncomeTaxesCurrent
3,FI2017Q4,63400000,AccrualForTaxesOtherThanIncomeTaxesCurrent
4,FI2016Q4,54000000,AccruedIncomeTaxesCurrent


In [16]:
for entry in root.findall('gaap:CommonStockDividendsPerShareCashPaid', uri_dict):
    print(entry.attrib)

{'contextRef': 'D2018Q1Dividend_us-gaap_StatementClassOfStockAxis_us-gaap_CommonClassAMember_us-gaap_SubsequentEventTypeAxis_us-gaap_SubsequentEventMember', 'decimals': '2', 'id': 'Fact-60A29EE1147605CC1F402C9B31120356', 'unitRef': 'usdPerShare'}
{'contextRef': 'FD2015Q4YTD_us-gaap_StatementClassOfStockAxis_us-gaap_CommonClassAMember', 'decimals': '2', 'id': 'Fact-E34CA8206F2C69E7EEA82C9B31026D21', 'unitRef': 'usdPerShare'}
{'contextRef': 'FD2016Q4YTD_us-gaap_StatementClassOfStockAxis_us-gaap_CommonClassAMember', 'decimals': '2', 'id': 'Fact-9943B877BA2EC43E84252C9B310242F1', 'unitRef': 'usdPerShare'}
{'contextRef': 'FD2017Q4YTD_us-gaap_StatementClassOfStockAxis_us-gaap_CommonClassAMember', 'decimals': '2', 'id': 'Fact-DFA5C0161F56DF3ABFCC2C9B3102D026', 'unitRef': 'usdPerShare'}


In [None]:
for child in root:
    if child.tag == '{http://fasb.org/us-gaap/2017-01-31}Assets':
        country_list = list(child.attrib['contextRef'])
        country_pos = len(country_list)
        country = ''.join(country_list[country_pos-2:country_pos])
        report = ''.join(country_list[0:8])
        if country == 'US':
            print(report)
            print(child.text)
        
    

In [None]:
pprinted = pp.PrettyPrinter(indent=4)
pprinted.pprint(root.findall('{http://fasb.org/us-gaap/2017-01-31}Assets'))

In [None]:
'dei:currentfiscalyearenddate'
'dei:entitycommonstocksharesoutstanding'
'dei:entityfilercategory'
'dei:entityregistrantname'
'dei:tradingsymbol'