In [1]:
import re
import json
import requests
import pandas as pd
from time import sleep

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_response(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
    }

    session = requests.Session()
    response = session.get(url, headers=headers).json()

    return response

In [3]:
def cik_normalize(cik):
    return str(cik).zfill(10)

def camelcase_to_title(txt):
    return [re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in txt]

In [4]:
url = "https://www.sec.gov/files/company_tickers.json"

response = get_response(url)
tickers_cik = pd.json_normalize(response.values())
tickers_cik.head()

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1652044,GOOGL,Alphabet Inc.
3,1318605,TSLA,"Tesla, Inc."
4,1067983,BRK-B,BERKSHIRE HATHAWAY INC


In [5]:
companies = tickers_cik.copy()
companies['cik_str'] = companies['cik_str'].apply(cik_normalize)
companies.head()

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1652044,GOOGL,Alphabet Inc.
3,1318605,TSLA,"Tesla, Inc."
4,1067983,BRK-B,BERKSHIRE HATHAWAY INC


In [6]:
url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{companies['cik_str'][2]}.json"

response = get_response(url)
data = response["facts"]["us-gaap"]

In [7]:
keys = data.keys()

In [8]:
usd_keys = [key for key in keys if 'USD' in data[key]['units'].keys()]
usd_keys

['AccountsPayableCurrent',
 'AccountsReceivableNetCurrent',
 'AccruedIncomeTaxesCurrent',
 'AccruedIncomeTaxesNoncurrent',
 'AccruedLiabilitiesCurrent',
 'AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment',
 'AccumulatedOtherComprehensiveIncomeLossNetOfTax',
 'AdjustmentsRelatedToTaxWithholdingForShareBasedCompensation',
 'AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue',
 'AdjustmentsToAdditionalPaidInCapitalTaxEffectFromShareBasedCompensation',
 'AllocatedShareBasedCompensationExpense',
 'AllowanceForDoubtfulAccountsReceivable',
 'AllowanceForDoubtfulAccountsReceivableCurrent',
 'Assets',
 'AssetsCurrent',
 'AssetsNoncurrent',
 'AvailableForSaleDebtSecuritiesAccumulatedGrossUnrealizedGainBeforeTax',
 'AvailableForSaleDebtSecuritiesAccumulatedGrossUnrealizedLossBeforeTax',
 'AvailableForSaleDebtSecuritiesAmortizedCostBasis',
 'AvailableForSaleSecuritiesContinuousUnrealizedLossPosition12MonthsOrLongerAccumulatedLos

In [9]:
shares_keys = [key for key in keys if 'shares' in data[key]['units'].keys()]
shares_keys

['CommonStockCapitalSharesReservedForFutureIssuance',
 'CommonStockSharesAuthorized',
 'CommonStockSharesIssued',
 'CommonStockSharesOutstanding',
 'PreferredStockSharesAuthorized',
 'PreferredStockSharesIssued',
 'PreferredStockSharesOutstanding',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsExercisableNumber',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsForfeituresAndExpirationsInPeriod',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsGrantsInPeriod',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsOutstandingNumber',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsVestedAndExpectedToVestExercisableNumber',
 'StockIssuedDuringPeriodSharesStockOptionsExercised']

In [10]:
usd_shares_keys = [key for key in keys if 'USD/shares' in data[key]['units'].keys()]
usd_shares_keys

['CommonStockParOrStatedValuePerShare',
 'EarningsPerShareBasic',
 'EarningsPerShareDiluted',
 'IncomeLossFromContinuingOperationsPerBasicShare',
 'IncomeLossFromContinuingOperationsPerDilutedShare',
 'PreferredStockParOrStatedValuePerShare',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsExercisableWeightedAverageExercisePrice',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsOutstandingWeightedAverageExercisePrice',
 'ShareBasedCompensationArrangementByShareBasedPaymentAwardOptionsVestedAndExpectedToVestExercisableWeightedAverageExercisePrice',
 'ShareBasedCompensationArrangementsByShareBasedPaymentAwardOptionsExercisesInPeriodWeightedAverageExercisePrice',
 'ShareBasedCompensationArrangementsByShareBasedPaymentAwardOptionsForfeituresInPeriodWeightedAverageExercisePrice']

In [11]:
pure_keys = [key for key in keys if 'pure' in data[key]['units'].keys()]
pure_keys

['EffectiveIncomeTaxRateContinuingOperations',
 'EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate',
 'EffectiveIncomeTaxRateReconciliationChangeInDeferredTaxAssetsValuationAllowance',
 'EffectiveIncomeTaxRateReconciliationChangeInEnactedTaxRate',
 'EffectiveIncomeTaxRateReconciliationForeignIncomeTaxRateDifferential',
 'EffectiveIncomeTaxRateReconciliationNondeductibleExpenseShareBasedCompensationCost',
 'EffectiveIncomeTaxRateReconciliationOtherAdjustments',
 'EffectiveIncomeTaxRateReconciliationOtherReconcilingItemsPercent',
 'EffectiveIncomeTaxRateReconciliationStateAndLocalIncomeTaxes',
 'EffectiveIncomeTaxRateReconciliationTaxCredits',
 'EffectiveIncomeTaxRateReconciliationTaxCreditsResearch',
 'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017Percent',
 'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017TransitionTaxOnAccumulatedForeignEarningsPercent',
 'OperatingLeaseWeightedAverageDiscountRatePercent',
 'ShortTermDebtWeightedAverageInteres

In [12]:
year_keys = [key for key in keys if 'Year' in data[key]['units'].keys()]
year_keys

[]

In [13]:
keys_list = [
             usd_keys,
             shares_keys,
             usd_shares_keys,
             pure_keys,
             year_keys
]

keys_groups = [
               'USD',
               'shares',
               'USD/shares',
               'pure',
               'Year'
]

full_df = pd.DataFrame()

for group, keys in zip(keys_groups, keys_list):
    for key in keys:
        key_data = data[key]['units'][group]
        tmp_full_df = pd.json_normalize(key_data)

        tmp_df = tmp_full_df[tmp_full_df['form'] == '10-K'].drop_duplicates('end', keep='last')
        tmp_df = tmp_df[['end', 'val']].set_index('end')
        full_df[key] = tmp_df

full_df.index.name = ''
full_df = full_df.T.dropna(how='all')
full_df

Unnamed: 0,2014-12-31,2015-12-31,2016-12-31,2017-12-31,2018-12-31,2019-12-31,2020-12-31,2021-12-31
AccountsPayableCurrent,1.715000e+09,1.931000e+09,2.041000e+09,3.137000e+09,4.378000e+09,5.561000e+09,5.589000e+09,6.037000e+09
AccountsReceivableNetCurrent,9.383000e+09,1.155600e+10,1.413700e+10,1.833600e+10,2.083800e+10,2.532600e+10,3.093000e+10,3.930400e+10
AccruedIncomeTaxesCurrent,9.600000e+07,3.020000e+08,5.540000e+08,8.810000e+08,6.900000e+07,2.740000e+08,1.485000e+09,8.080000e+08
AccruedIncomeTaxesNoncurrent,,,,1.281200e+10,1.132700e+10,9.885000e+09,8.849000e+09,9.176000e+09
AccruedLiabilitiesCurrent,4.408000e+09,4.768000e+09,6.144000e+09,1.017700e+10,1.695800e+10,2.306700e+10,2.863100e+10,3.123600e+10
...,...,...,...,...,...,...,...,...
EffectiveIncomeTaxRateReconciliationTaxCreditsResearch,1.800000e-02,2.100000e-02,2.000000e-02,1.800000e-02,2.400000e-02,2.500000e-02,2.300000e-02,1.600000e-02
EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017Percent,,,,,-1.300000e-02,-6.000000e-03,0.000000e+00,
EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017TransitionTaxOnAccumulatedForeignEarningsPercent,,,,3.760000e-01,-1.000000e-03,-6.000000e-03,,
OperatingLeaseWeightedAverageDiscountRatePercent,,,,,,2.800000e-02,2.600000e-02,2.300000e-02


In [14]:
last_year_data = full_df.iloc[:, -1].dropna()
last_year_data.index.to_list()

['AccountsPayableCurrent',
 'AccountsReceivableNetCurrent',
 'AccruedIncomeTaxesCurrent',
 'AccruedIncomeTaxesNoncurrent',
 'AccruedLiabilitiesCurrent',
 'AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment',
 'AccumulatedOtherComprehensiveIncomeLossNetOfTax',
 'AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue',
 'AllocatedShareBasedCompensationExpense',
 'AllowanceForDoubtfulAccountsReceivable',
 'Assets',
 'AssetsCurrent',
 'AvailableForSaleSecuritiesDebtMaturitiesAfterFiveThroughTenYearsFairValue',
 'AvailableForSaleSecuritiesDebtMaturitiesAfterOneThroughFiveYearsFairValue',
 'AvailableForSaleSecuritiesDebtMaturitiesAfterTenYearsFairValue',
 'AvailableForSaleSecuritiesDebtMaturitiesWithinOneYearFairValue',
 'AvailableForSaleSecuritiesDebtSecurities',
 'CashAndCashEquivalentsAtCarryingValue',
 'CashAndCashEquivalentsFairValueDisclosure',
 'CashCashEquivalentsAndShortTermInvestments',
 'CashCashEquivalentsRestrictedC

In [15]:
df_5y = full_df.copy()
df_5y = df_5y.iloc[:, -5:].dropna(how='all')
df_5y.index = camelcase_to_title(df_5y.index)
df_5y

Unnamed: 0,2017-12-31,2018-12-31,2019-12-31,2020-12-31,2021-12-31
Accounts Payable Current,3.137000e+09,4.378000e+09,5.561000e+09,5.589000e+09,6.037000e+09
Accounts Receivable Net Current,1.833600e+10,2.083800e+10,2.532600e+10,3.093000e+10,3.930400e+10
Accrued Income Taxes Current,8.810000e+08,6.900000e+07,2.740000e+08,1.485000e+09,8.080000e+08
Accrued Income Taxes Noncurrent,1.281200e+10,1.132700e+10,9.885000e+09,8.849000e+09,9.176000e+09
Accrued Liabilities Current,1.017700e+10,1.695800e+10,2.306700e+10,2.863100e+10,3.123600e+10
...,...,...,...,...,...
Effective Income Tax Rate Reconciliation State And Local Income Taxes,1.000000e-03,-4.000000e-03,1.100000e-02,1.100000e-02,1.000000e-02
Effective Income Tax Rate Reconciliation Tax Credits Research,1.800000e-02,2.400000e-02,2.500000e-02,2.300000e-02,1.600000e-02
Effective Income Tax Rate Reconciliation Tax Cuts And Jobs Act Of2017Percent,,-1.300000e-02,-6.000000e-03,0.000000e+00,
Effective Income Tax Rate Reconciliation Tax Cuts And Jobs Act Of2017Transition Tax On Accumulated Foreign Earnings Percent,3.760000e-01,-1.000000e-03,-6.000000e-03,,


In [16]:
df_5y[df_5y.index.str.contains('Cash')].iloc[:, -1].dropna()

Cash And Cash Equivalents At Carrying Value                                                                                      2.094500e+10
Cash And Cash Equivalents Fair Value Disclosure                                                                                  7.499000e+09
Cash Cash Equivalents And Short Term Investments                                                                                 1.396490e+11
Cash Cash Equivalents Restricted Cash And Restricted Cash Equivalents                                                            2.094500e+10
Cash Cash Equivalents Restricted Cash And Restricted Cash Equivalents Period Increase Decrease Including Exchange Rate Effect   -5.520000e+09
Derivative Collateral Obligation To Return Cash                                                                                  3.940000e+08
Derivative Collateral Right To Reclaim Cash                                                                                      4.000000e+06
Effect

In [17]:
# https://data.sec.gov/api/xbrl/frames/us-gaap/[tag]/USD/[frame].json

# https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2022Q1I.json