In [6]:
import requests
import pandas as pd
import re
import time
from datetime import datetime

In [7]:
tickerTime = 0

In [8]:
def getTickers(email):
    """
    This function retrieves all the tickers on the SEC website and returns them in a dataframe
    """
    
    headers = {'User-Agent': f"{email}"}
    startTime = time.time()
    companyTickers = requests.get(
        "https://www.sec.gov/files/company_tickers.json",
        headers=headers
        )
    endTime = time.time()
    tickerTime = endTime - startTime
    tickerDict = companyTickers.json()
    tickerDf = pd.DataFrame(index = range(len(tickerDict)), columns = ["CIK", "Ticker", "Name"])
    for i in range(len(tickerDict)): 
        tickerDf.iloc[i, 0] = str(tickerDict[str(i)]['cik_str']).zfill(10)
        tickerDf.iloc[i, 1] = tickerDict[str(i)]['ticker']
        tickerDf.iloc[i, 2] = tickerDict[str(i)]['title']
    return tickerDf

In [9]:
class Company:
    
    email = "anthonytaylor@ufl.edu"
    tickerDf = getTickers(email)

    def __init__(self, ticker):
        self.ticker = ticker
        self.cik = self.findCik()
        self.rawCompanyData = self.findRawCompanyData()
        self.incomeStatementDict = self.rawDataToIncomeStatementDict()
        self.incomeStatement = self.formIncStateFromDict()

    def findCik(self):
        cikRow = self.tickerDf[self.tickerDf['Ticker'] == self.ticker]
        cik = cikRow.iloc[0, 0]
        return cik
    
    def getCik(self):
        return self.cik 
    
    def findRawCompanyData(self):
        rawCompanyData = requests.get(
            f'https://data.sec.gov/api/xbrl/companyfacts/CIK{self.cik}.json',
            headers={'User-Agent': self.email})
        rawCompanyData = rawCompanyData.json()
        return rawCompanyData['facts']['us-gaap']
    
    def printRawCompanyDataKeys(self):
        print(self.rawCompanyData.keys())
        
    def searchRawCompanyDataKeys(self, keyword):
        matchList = []
        keyword = re.compile(keyword)
        for key in self.rawCompanyData.keys():
            if re.search(keyword, key, re.IGNORECASE):
                matchList.append(key)
        
    def rawDataToIncomeStatementDict(self):
        '''
        use regex to form condensed dictionary of just income statement items 
        '''
        incomeStatementDict = {}
        #Set revenue keywords useing re package
        revenueKeys = ["[Ss]ales[Rr]evenue[Nn]et"]
        revenueKeys = [re.compile(key) for key in revenueKeys]
        #Search each possible keyWord with each item in the data until match is found
        for keyWord in revenueKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['Revenue'] = self.rawCompanyData[key]['units']['USD']
                    break
        #Do the same all the way dow nthe income statement
        costKeys = [r"[Cc]ost[Oo]f[Gs]oods[Ss]old", r'[Cc]ost[Oo]f[Rr]evenue', r'CostOfGoodsAndServicesSold']
        costKeys = [re.compile(key) for key in costKeys]
        for keyWord in costKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['CostOfGoodsSold'] = self.rawCompanyData[key]['units']['USD']
                    break
        GPKeys = [r"[Gg]ross[Pp]rofit.*"]
        GPKeys = [re.compile(key) for key in GPKeys]
        for keyWord in GPKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['GrossProfit'] = self.rawCompanyData[key]['units']['USD']
                    break     
        OpExKeys = [r"[Oo]perating[Ee]xpenses"]
        OpExKeys = [re.compile(key) for key in OpExKeys]
        for keyWord in OpExKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['OperatingExpenses'] = self.rawCompanyData[key]['units']['USD']
                    break
        EBITKeys = [r"[Ii]ncome[Ff]rom[Oo]perations*"]
        EBITKeys = [re.compile(key) for key in EBITKeys]
        for keyWord in EBITKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['EBIT'] = self.rawCompanyData[key]['units']['USD']
                    break
        preTaxIncKeys = [r"[Ii]ncome[Bb]efore[Tt]axes*"]
        PreTaxIncKeys = [re.compile(key) for key in preTaxIncKeys]
        for keyWord in PreTaxIncKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['PreTaxIncome'] = self.rawCompanyData[key]['units']['USD']
                    break
        incomeKeys = [r"\b[Nn]et[Ii]ncome"]
        incomeKeys = [re.compile(key) for key in incomeKeys]
        for keyWord in incomeKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['NetIncome'] = self.rawCompanyData[key]['units']['USD']
                    break
        incomeKeys = [r"[Ee]arnings[Pp]er[Ss]hare[Bb]asic"]
        incomeKeys = [re.compile(key) for key in incomeKeys]
        for keyWord in incomeKeys:
            for key in self.rawCompanyData.keys():
                if keyWord.search(key):
                    incomeStatementDict['EPS'] = self.rawCompanyData[key]['units']['USD/shares']
                    break
        return incomeStatementDict
    
    def printIncomeStatementDictKeys(self):
        print(self.incomeStatementDict.keys())
        
    def printIncomeStatementDictKeys2(self):
        print(self.incomeStatementDict['Revenue'])
        
    def formIncStateFromDict(self):
        colDict = {}
        item = 'CostOfGoodsSold'
        # iterate through dictionary to create a condensed dictionary that feeds into dataframe
        for i in range(len(self.incomeStatementDict[item])):
            #create a tuple of date times to represent a range of dates for each value 
             start = self.incomeStatementDict[item][i]['start']
             end = self.incomeStatementDict[item][i]['end']
             format = "%Y-%m-%d"
             start = datetime.strptime(start,format)
             end = datetime.strptime(end,format)
             quarter = (start.date(), end.date())
             colDict[quarter] = [self.incomeStatementDict[item][i]['val']]
        incomeDf = pd.DataFrame(colDict)
        return incomeDf

    def printIncState(self):
        pd.set_option('display.max_rows', None)
        return self.incomeStatement

In [10]:
aapl = Company("AAPL")
# if the first value of the tuple doesn't exist, delete it
aapl.printIncomeStatementDictKeys()

dict_keys(['Revenue', 'CostOfGoodsSold', 'GrossProfit', 'OperatingExpenses', 'NetIncome', 'EPS'])


In [11]:
company = Company("AAPL")
company.printIncState()

Unnamed: 0_level_0,2006-10-01,2007-09-30,2008-03-30,2007-09-30,2008-09-28,2008-09-28,2008-12-28,2008-09-28,2009-03-29,2008-09-28,...,2022-09-25,2023-01-01,2022-09-25,2023-04-02,2022-09-25,2023-10-01,2023-10-01,2023-12-31,2023-10-01,2024-03-31
Unnamed: 0_level_1,2007-09-29,2008-06-28,2008-06-28,2008-09-27,2008-12-27,2009-03-28,2009-03-28,2009-06-27,2009-06-27,2009-09-26,...,2023-04-01,2023-04-01,2023-07-01,2023-07-01,2023-09-30,2023-12-30,2024-03-30,2024-03-30,2024-06-29,2024-06-29
0,16426000000,16178000000,4864000000,24294000000,7373000000,12830000000,5457000000,18581000000,5751000000,25683000000,...,119682000000,52860000000,165066000000,45384000000,214137000000,64720000000,113202000000,48482000000,159301000000,46099000000


In [5]:
from dataframe import MyDataFrame
df = MyDataFrame()

In [2]:
df = MyDataFrame()
df["Age"] = [25, 30, 35]
df["City"] = ["New York", "Los Angeles", "Chicago"]

In [3]:
df

Age | City
----------
25 | New York
30 | Los Angeles
35 | Chicago