# Obtaining SEC Financial Data

Change the names of the companies below to select the ones to be analyzed

In [103]:
import pandas as pd
import numpy as np
import datetime
from secfsdstools.update import update
from secfsdstools.c_index.companyindexreading import CompanyIndexReader
from secfsdstools.c_index.searching import IndexSearch
from secfsdstools.e_collector.reportcollecting import SingleReportCollector
from secfsdstools.e_filter.rawfiltering import ReportPeriodRawFilter
from secfsdstools.e_presenter.presenting import StandardStatementPresenter

#Prevent logging message prints
import logging
logging.getLogger('secfsdstools').setLevel(logging.ERROR)

#Update DB
print("Updating SEC DB...")
update()
print("---Done.")

#Get CIK for Each of Companies
companyNames = [
    "Apple Inc",
    "Johnson & Johnson",
    "JPMorgan Chase",
    "Exxon",
    "Lockheed Martin",
    "NVIDIA CORP"
]



Updating SEC DB...
---Done.


### Company Class, used to store all information associated with company

In [104]:
## Company Class: Stores information from a given CIK
class Company:
    def __init__(self, cik):
        self.cik = cik
        self.report_reader = CompanyIndexReader.get_company_index_reader(cik=self.cik)
        self.consolidated_filings = pd.DataFrame(columns=['tag'])
        self.secLinks = dict()

    def get_cik(self):
        return self.cik

    def get_report_reader(self):
        return self.report_reader

    def getAvailableReports(self):
        return list(self.report_reader.get_all_company_reports_df()['form'].unique())

    def getFilingList(self, reportType, startDate, endDate):
        if reportType == 'All':
            unfilteredDF = self.report_reader.get_all_company_reports_df()
        else:
            unfilteredDF = self.report_reader.get_all_company_reports_df(forms=reportType)

        filteredDF = unfilteredDF[(unfilteredDF.period >= startDate) & (unfilteredDF.period <= endDate)]
        return filteredDF

    def getConsolidatedFilings(self):
        return self.consolidated_filings

    def appendFilings(self, df, filingDate):
        """
        Appends new filings data to the consolidated_filings DataFrame from a dataframe input.
        Includes both tag and stmt columns.

        Parameters:
        - df: pandas DataFrame containing 'tag', 'stmt', and 'merged' columns
        - filingDate: str representing the filing date to be used as column name
        """
        # Get list of tags from input DataFrame
        new_tags = df[['tag', 'stmt']].copy()

        # If this is the first data being added, initialize with both columns
        if len(self.consolidated_filings) == 0:
            self.consolidated_filings = pd.DataFrame(columns=['tag', 'stmt'])

        # Convert existing tag-stmt combinations to set for efficient comparison
        existing_combinations = set(zip(self.consolidated_filings['tag'], self.consolidated_filings['stmt']))
        new_combinations = set(zip(new_tags['tag'], new_tags['stmt']))
        combinations_to_add = new_combinations - existing_combinations

        # Add new tag-stmt combinations if any
        if combinations_to_add:
            new_rows = pd.DataFrame(list(combinations_to_add), columns=['tag', 'stmt'])
            self.consolidated_filings = pd.concat([self.consolidated_filings, new_rows], ignore_index=True)

        # Create the filing date column if it doesn't exist
        if filingDate not in self.consolidated_filings.columns:
            self.consolidated_filings[filingDate] = None

        # Update values for all tags in the input DataFrame
        for _, row in df.iterrows():
            mask = (self.consolidated_filings['tag'] == row['tag']) & (self.consolidated_filings['stmt'] == row['stmt'])
            # Update the value in the filing date column
            self.consolidated_filings.loc[mask, filingDate] = row['merged']

        # Replace all NaN with None
        self.consolidated_filings = self.consolidated_filings.replace({np.nan: None})

        return

    def addSecFormLink(self, period, url):
        self.secLinks[period] = url
        return

    def getSecLink(self):
        return self.secLinks

Create Company objects for all of the companies to be analyzed

In [105]:
#Determine Company CIK from Name
companyObjDict = dict()
index_search = IndexSearch.get_index_search()
for c in companyNames:
    results = index_search.find_company_by_name(c)
    if len(results) == 1:
        print("CIK for {} : {}".format(c, results.iloc[0]['cik']))
        companyObjDict[c] = Company(cik=results.iloc[0]['cik'])
    else:
        print("-------------------------------------------------")
        print("Multiple CIK for company name {} found:".format(c))
        for index, row in results.iterrows():
            print(index, row['cik'], row['name'])
        selectedIndex = int(input("Select company index from list: "))
        companyObjDict[results.iloc[selectedIndex]['name']] = Company(cik=results.iloc[selectedIndex]['cik'])


CIK for Apple Inc : 320193
CIK for Johnson & Johnson : 200406
CIK for JPMorgan Chase : 19617
CIK for Exxon : 34088
CIK for Lockheed Martin : 936468
CIK for NVIDIA CORP : 1045810


Function used to select only the current period data from a given report

In [106]:
def select_value(row):
    # Get non-NaN values and their indices
    non_nan = [(i, val) for i, val in enumerate(row) if pd.notna(val)]
    if len(non_nan) == 1:
        return non_nan[0][1]
    elif len(non_nan) == 2:
        return non_nan[-1][1]
    return None

Obtain all 10Q/10K reports for each of the companies, we will be focusing only on the income statement as the purpose of this paper is to determine the impact of revenue drivers

In [107]:
#Process numerical financial information using 10K/Q
for name, obj in companyObjDict.items():
    #Get latest filings last, in order to append to np array.
    filingList = obj.getFilingList(reportType=['10-K','10-Q'], startDate=0,endDate=int(datetime.date.today().strftime('%Y%m%d'))).sort_values('period', ascending=True)
    print("Company {} has {} available 10K/Q reports, processing...".format(name, filingList.shape[0]))

    for row in filingList.itertuples():
        #Obtaining only income statement
        collector: SingleReportCollector = SingleReportCollector.get_report_by_adsh(adsh=row.adsh, stmt_filter=['IS'])
        rawdatabag = collector.collect()
        #Obtain data associated with this current period
        df = (rawdatabag.filter(ReportPeriodRawFilter()).join().present(StandardStatementPresenter()))
        # Obtain only the data associated from three months ended periods (QRTS = 0 or 1)
        cols_after_inpth = df.loc[:, df.columns[df.columns.get_loc('inpth') + 1:]]

        df['merged'] = cols_after_inpth.apply(select_value, axis=1)
        #Append merged column to company object after obtaining the filing date
        companyObjDict[name].appendFilings(df, datetime.datetime.strptime(str(row.period), '%Y%m%d'))
        #Append link to filing for reference
        companyObjDict[name].addSecFormLink(row.period, row.url)

    print("---Done Processing----")

Company Apple Inc has 61 available 10K/Q reports, processing...
Company Johnson & Johnson has 61 available 10K/Q reports, processing...
Company JPMorgan Chase has 61 available 10K/Q reports, processing...
Company Exxon has 61 available 10K/Q reports, processing...
Company Lockheed Martin has 61 available 10K/Q reports, processing...
Company NVIDIA CORP has 61 available 10K/Q reports, processing...


#### Note:

Sales and Revenue numbers are somestimes not reported under the same tag use the below to investigrate.
exampleFilings = companyObjDict[companyNames[0]].getConsolidatedFilings()
exampleFilings[exampleFilings['tag'].str.contains('revenue', case=False, na=False)]

exampleFilings[exampleFilings['tag'].str.contains('sales', case=False, na=False)]

secLinks = companyObjDict[companyNames[0]].getSecLink()
secLinks[20181231]

## Printout of Income Statement for first company

In [108]:
companyObjDict[companyNames[0]].getConsolidatedFilings()

Unnamed: 0,tag,stmt,2009-06-30 00:00:00,2009-09-30 00:00:00,2009-12-31 00:00:00,2010-03-31 00:00:00,2010-06-30 00:00:00,2010-09-30 00:00:00,2010-12-31 00:00:00,2011-03-31 00:00:00,...,2022-03-31 00:00:00,2022-06-30 00:00:00,2022-09-30 00:00:00,2022-12-31 00:00:00,2023-03-31 00:00:00,2023-06-30 00:00:00,2023-09-30 00:00:00,2023-12-31 00:00:00,2024-03-31 00:00:00,2024-06-30 00:00:00
0,WeightedAverageNumberOfDilutedSharesOutstanding,IS,909160000.0,907005000.0,919783000.0,922878000.0,927361000.0,924712000.0,933154000.0,935944000.0,...,16403316000.0,16262203000.0,16325819000.0,15955718000.0,15847050000.0,15775021000.0,15812547000.0,15576641000.0,15464709000.0,15348175000.0
1,NetIncomeLoss,IS,1229000000.0,5704000000.0,3378000000.0,3074000000.0,3253000000.0,4308000000.0,6004000000.0,5987000000.0,...,25010000000.0,19442000000.0,99803000000.0,29998000000.0,24160000000.0,19881000000.0,96995000000.0,33916000000.0,23636000000.0,21448000000.0
2,EarningsPerShareBasic,IS,1.38,6.39,3.74,3.39,3.57,4.71,6.53,6.49,...,1.54,1.2,6.15,1.89,1.53,1.27,6.16,2.19,1.53,1.4
3,SalesRevenueNet,IS,8337000000.0,36537000000.0,15683000000.0,13499000000.0,15700000000.0,20343000000.0,26741000000.0,24667000000.0,...,,,,,,,,,,
4,OperatingExpenses,IS,1351000000.0,5482000000.0,1686000000.0,1646000000.0,1902000000.0,7299000000.0,2471000000.0,2344000000.0,...,12580000000.0,12809000000.0,51345000000.0,14316000000.0,13658000000.0,13415000000.0,54847000000.0,14482000000.0,14371000000.0,14326000000.0
5,WeightedAverageNumberOfSharesOutstandingBasic,IS,893712000.0,893016000.0,903542000.0,907548000.0,912197000.0,909461000.0,919294000.0,923196000.0,...,16278802000.0,16162945000.0,16215963000.0,15892723000.0,15787154000.0,15697614000.0,15744231000.0,15509763000.0,15405856000.0,15287521000.0
6,OperatingIncomeLoss,IS,1672000000.0,7658000000.0,4725000000.0,3979000000.0,4234000000.0,18385000000.0,7827000000.0,7874000000.0,...,29979000000.0,23076000000.0,119437000000.0,36016000000.0,28318000000.0,22998000000.0,114301000000.0,40373000000.0,27900000000.0,25352000000.0
7,IncomeTaxExpenseBenefit,IS,503000000.0,2280000000.0,1380000000.0,955000000.0,1039000000.0,4527000000.0,1959000000.0,1913000000.0,...,5129000000.0,3624000000.0,19300000000.0,5625000000.0,4222000000.0,2852000000.0,16741000000.0,6407000000.0,4422000000.0,4046000000.0
8,IncomeLossFromContinuingOperationsBeforeIncome...,IS,1732000000.0,7984000000.0,4758000000.0,4029000000.0,4292000000.0,18540000000.0,7963000000.0,7900000000.0,...,,,,,,,,,,
9,ResearchAndDevelopmentExpense,IS,341000000.0,1333000000.0,398000000.0,426000000.0,464000000.0,1782000000.0,575000000.0,581000000.0,...,6387000000.0,6797000000.0,26251000000.0,7709000000.0,7457000000.0,7442000000.0,29915000000.0,7696000000.0,7903000000.0,8006000000.0


# Obtaining Macroeconomic indicators for years of company reports

The key OECD macroeconomic indicators that significantly impact company revenues:

1. GDP Growth Rate
- Direct impact on consumer spending and business investment
- Leading indicator of market expansion/contraction

2. Interest Rates
- Affects borrowing costs and investment decisions
- Influences consumer spending on credit-dependent purchases

3. Inflation Rate (CPI)
- Impacts purchasing power and consumer spending
- Influences pricing strategies and profit margins

4. Employment Rate
- Drives consumer confidence and spending power
- Affects labor costs and workforce availability

5. Industrial Production Index
- Indicates manufacturing sector health
- Supply chain implications

6. Consumer Confidence Index
- Predicts spending patterns
- Early indicator of demand changes

7. Trade Balance
- International market opportunities
- Exchange rate implications

8. Retail Sales
- Direct measure of consumer spending
- Sector-specific growth indicators
