In [1]:
import pandas as pd

In [2]:
# load data
num_df = pd.read_csv("./Data/SEC Statements 2023q1/num.txt", delimiter="\t")
sub_df = pd.read_csv("./Data/SEC Statements 2023q1/sub.txt", delimiter="\t")

# format dates
num_df["ddate"] = pd.to_datetime(num_df["ddate"], format="%Y%m%d")
num_df["year"] = num_df["ddate"].dt.year

## Explore One Entity

In [70]:
# TESLA has 3 unique adsh, each differing by the columns shown
view = sub_df[sub_df.name.str.contains("TESLA")].T
select = view.apply(lambda x: x.nunique() > 1, axis=1)
view[select]

Unnamed: 0,20470,22013,25901
adsh,0000950170-23-001409,0001564590-23-000799,0001564590-23-000002
wksi,1,0,0
form,10-K,8-K,8-K
period,20221231.0,20230131.0,20221231.0
filed,20230131,20230125,20230103
accepted,2023-01-30 21:29:00.0,2023-01-25 16:41:00.0,2023-01-03 06:09:00.0
detail,1,0,0
instance,tsla-20221231_htm.xml,tsla-8k_20230120_htm.xml,tsla-8k_20230102_htm.xml


In [72]:
view

Unnamed: 0,20470,22013,25901
adsh,0000950170-23-001409,0001564590-23-000799,0001564590-23-000002
cik,1318605,1318605,1318605
name,"TESLA, INC.","TESLA, INC.","TESLA, INC."
sic,3711.0,3711.0,3711.0
countryba,US,US,US
stprba,CA,CA,CA
cityba,PALO ALTO,PALO ALTO,PALO ALTO
zipba,94304,94304,94304
bas1,3500 DEER CREEK RD,3500 DEER CREEK RD,3500 DEER CREEK RD
bas2,,,


In [48]:
# filter for the adsh corresponding to 10-K filing
adsh = "0000950170-23-001409"
counts = num_df.query("adsh == @adsh & ddate == '2022-12-31'").value_counts("tag")
counts

tag
AccountsAndNotesReceivableNet                                     1
NetIncomeLossAvailableToCommonStockholdersDiluted                 1
NumberOfCustomersRepresentAccountReceivableThresholdPercentage    1
NotesReceivableNet                                                1
NotesAndLoansReceivableNetCurrent                                 1
                                                                 ..
FinanceLeaseInterestPaymentOnLiability                            1
FinanceLeaseInterestExpense                                       1
FinanceLeaseExpense                                               1
FairValueNetAssetLiability                                        1
WeightedAverageNumberOfSharesOutstandingBasic                     1
Length: 319, dtype: int64

In [67]:
view = (num_df
 .query("adsh == @adsh")
 .sort_values("tag")
 .groupby("tag")
 .tail(1)
 .pivot(values="value", columns=["tag"], index=["adsh"])
 .T)

for row in view.iterrows():
    print(row[0], row[1][0])

AccountsAndNotesReceivableNet 280000000.0
AccountsPayableCurrent 15255000000.0
AccountsReceivableNetCurrent 1913000000.0
AccountsReceivableThresholdPercentage 0.1
AccruedAndOtherCurrentLiabilities 7142000000.0
AccruedPurchases 2747000000.0
AccruedWarrantyReserveCurrentPortion 703000000.0
AccruedWarrantyReserveNoncurrent 1398000000.0
AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment 9041000000.0
AccumulatedOtherComprehensiveIncomeLossNetOfTax 54000000.0
AdditionalPaidInCapital 27260000000.0
AdditionalPaidInCapitalCommonStock 29803000000.0
AdjustmentsToAdditionalPaidInCapitalSharebasedCompensationRequisiteServicePeriodRecognitionValue 1806000000.0
AllocatedShareBasedCompensationExpense 1734000000.0
Assets 62131000000.0
AssetsCurrent 27100000000.0
AvailableForSaleDebtSecuritiesGrossUnrealizedGain 1000000.0
AvailableForSaleDebtSecuritiesGrossUnrealizedLoss 1000000.0
AvailableForSaleOfSecuritiesAmortizedCost 22209000000.0
AvailableForSaleSecuritiesDebtMaturitiesNextRo

## Explore All Entities

In [20]:
# get entity names that have filed one 10-K form
entity_names = sub_df.query("form == '10-K'").value_counts("name")
entity_names = entity_names[entity_names == 1].index

# get the corresponding Accession Numbers (id assigned to each EDGAR submission)
adsh_list = sub_df[sub_df.name.isin(entity_names)].adsh
print(len(adsh_list))

19545


In [44]:
# making sure all tags (or most) occurs once
queries = ["adsh in @adsh_list",
           "coreg.isnull()"
          ]
tag_counts = num_df.query(" & ".join(queries)).value_counts(["adsh", "tag"]).sort_values(ascending=False)
tag_counts[tag_counts > 1]

adsh                  tag                                                 
0001580345-23-000008  EarningsPerShareBasicDistributed                        42
0000726728-23-000044  CommonStockDividendsPerShareCashPaid                    39
0001577134-23-000008  StockIssuedDuringPeriodValueDividendReinvestmentPlan    23
                      PaymentsOfDividendsCommonStock                          23
                      PaymentsOfDividends                                     23
                                                                              ..
0001477932-23-000591  AllowanceForDoubtfulAccountsReceivable                   2
                      CashAndCashEquivalentsAtCarryingValue                    2
0001477932-23-000586  WeightedAverageNumberOfShareOutstandingBasic             2
0001628280-23-009556  OperatingLeaseRightOfUseAssetAmortizationExpense         2
0001477932-23-000586  WeightedAverageNumberOfDilutedShareOutstanding           2
Length: 888626, dtype: int64

In [56]:
data = (num_df
        .query(" & ".join(queries))
        .sort_values("ddate")
        .groupby(["adsh", "tag"])
        .tail(1)
        .pivot(index=["adsh"], columns=["tag"], values="value"))

KeyboardInterrupt: 

In [46]:
data.isna().mean(axis=0).sort_values()

tag
NetIncomeLoss                                                                                       0.076038
Assets                                                                                              0.190013
LiabilitiesAndStockholdersEquity                                                                    0.194715
NetCashProvidedByUsedInOperatingActivities                                                          0.201524
NetCashProvidedByUsedInFinancingActivities                                                          0.213035
                                                                                                      ...   
AccruedInterestExpensesNonCurrent                                                                   1.000000
DeferredIncomeTaxesReleaseOfValuationAllowance                                                      1.000000
RetirementOfShares                                                                                  1.000000
CurrentIncomeTa

In [47]:
data.loc["0000950170-23-001409"]

tag
A1031ExchangeFundsForBusinessCombinations        NaN
A155SeniorNotesDue2026                           NaN
A199AQualifiedBusinessIncome                     NaN
A2.30seniornotesdue2024                          NaN
A2.95seniornotesdue2029                          NaN
                                                  ..
warrantsToPurchaseSharesOfCommonStock            NaN
weightedAverageTradingPriceOfTheOrdinaryShares   NaN
workingCapital                                   NaN
workingCapitalDeficit                            NaN
workingCapitalLoans                              NaN
Name: 0000950170-23-001409, Length: 157451, dtype: float64