In [1]:
import requests as req
import pandas as pd
from bs4 import BeautifulSoup
import csv
import re
import itertools as iter
import copy
import time

In [2]:
def download_files():
    """Downloads all index files from EDGAR into the current directory"""
    
    for year in range(1993, 2019):
        for quarter in range(1, 5):
            url = 'https://www.sec.gov/Archives/edgar/full-index/' + str(year) + '/QTR' + str(quarter) + '/company.idx'
            r = req.get(url, allow_redirects=True)
            with open('Index-' + str(year) + '-' + str(quarter) + '.txt', "wb") as f:
                f.write(r.content)

In [3]:
import os

def get_reports(company_name, startyear, startquarter, endyear, endquarter):
    """
    Reads all index files and returns a generator of URLs of all 10-K/10-Qs of the given company.
    
    Parameters:
    company_name (str): name of the company 
    startyear (int): the year to start looking for reports, inclusive
    startquarter (int): the quarter in startyear to start looking for reports, inclusive
    endyear (int): the year to stop looking for reports, inclusive
    endquarter(int): the quarter in endyear to stop looking for reports, inclusive
    
    Returns:
    Generator: each element is a tuple with the form (year of report, quarter of report, url of report)
    
    """
    
    files = filter(lambda x: x[-4:] == '.txt', os.listdir('.'))
    for file in files:
        year = int(file[6:10])
        if year < startyear or year > endyear:
            continue
        quarter = int(file[11])
        if (year == startyear and quarter < startquarter) or (year == endyear and quarter > endquarter):
            continue
        txt = open(file, 'r')
        for line in txt:
            if line.lower().startswith(company_name.lower()) and '10-Q ' in line: 
                url_index = line.index('edgar/')
                url = line[url_index:].rstrip('\n').strip()
                yield (year, quarter, 'https://www.sec.gov/Archives/' + url)
                break

In [4]:
def make_tables(company_name, text, limit, search_terms, year, quarter, mapping, default_side, max_tables):
    """
    Parses HTML text and returns a DataFrame containing the desired data
    
    Parameters:
    company_name (str): the name of the company
    text (str): the text of the company report
    limit (int): the max number of tables to search through in the report
    search_terms (list of str): the terms to filter each table for. 
    year (int): the year the document corresponds to
    quarter(int): the quarter the document corresponds to
    mapping (dict): a dict with items of the form (term: [locations, amount])
        term (str): one of the search terms for which detailed location/amount information is required
        locations (list of int): location(s) of the targeted value in the row
        amount (int): the number of times to read in values of this term
    default_side (str): the default location of the number to keep on each row (left/right) if the label is not in mapping
    
    Returns:
    DataFrame: a table containing values in the document corresponding to each of the search terms
    
    """
    regex = re.compile('[^a-z A-Z]')
    soup = BeautifulSoup(text)
    tables = soup.find_all("table")
    column_list = ['Year', 'Quarter']
    data_values = [year, quarter]
    found_terms = [] # By default, only the first occurence of a term in a row label will be used
    table_num = 0
    diluted_found = False
    equity_found = False
    term_map = copy.deepcopy(mapping)
    
    for table in tables:
        #print(max_tables)
        max_tables -= 1
        if max_tables == 0:
            break
        added_rows = False
        for row in table.find_all('tr'):
            data_row = []
            columns = row.find_all('td')
            if(len(columns) == 0):
                continue
            #print([xc.get_text() for xc in columns])
            first_column = ''
            for col in columns:
                if len(col.get_text().strip()) > 0:
                    first_column = regex.sub('', col.get_text()) # TODO: Does using regex break anything?
                    break
            #if len(column_list) > 2:
                #print(year, quarter, ':', repr(first_column))
            row_label_tokens = first_column.split()
            row_label = ' '.join(row_label_tokens)
            row_label = regex.sub('', row_label).strip()
            #if 'diluted' in row_label.lower():
                #print(year, quarter, ':', repr(row_label))
            for term in search_terms:
                if term in row_label:
                    #print(term)
                    if term in found_terms and term not in term_map:
                        break
                    if term == 'Net loss' and 'Net income' in column_list:
                        break
                    row_values = []
                    for i in range(1, len(columns)):
                        cell_text = columns[i].get_text()
                        if any(char.isdigit() for char in cell_text) and not re.search('[a-zA-Z]', cell_text): # TODO: does the second part break anything?
                            if cell_text.strip()[0] == '(': # TODO: Keep making numbers in parentheses negative?
                                cell_text = '-' + cell_text
                            for punctuation in [',', '(', ')', '\n']:
                                cell_text = cell_text.replace(punctuation, '')
                            row_values.append(cell_text)
                    if len(row_values) == 0: 
                        break
                    if 'diluted' in term.lower():
                        if diluted_found:
                            break
                        diluted_found = True
                    if 'equity' in term.lower():
                        if equity_found:
                            break
                        equity_found = True
                    found_terms.append(term)
                    if term in term_map:
                        term_amount = term_map[term][1]
                        if term_amount == 0:
                            break
                        column_label = term
                        term_start_amount = mapping[term][1]
                        if term_start_amount > 1:
                            column_label += str(term_start_amount - term_amount + 1)
                        positions = term_map[term][0]
                        for position in positions:
                            if len(positions) == 1:
                                column_list.append(column_label)
                            else:
                                column_list.append(column_label + ' [' + str(position) + ']')
                            data_values.append(float(row_values[position]))
                            #print(row_values[position])
                        term_map[term][1] -= 1
                    else:
                        column_list.append(term)
                        if default_side == 'left':
                            data_values.append(float(row_values[0]))
                        else:
                            data_values.append(float(row_values[-1]))
                        #print(row_label)
                    added_rows = True
                    break
        
        if added_rows:
            table_num += 1
        if table_num >= limit:
            break
    #print(data_values)
    #print(year, quarter, ':', column_list)
    return pd.DataFrame([data_values], index=[company_name + ' ' + str(year) + '-' + str(quarter)], columns=column_list)

In [5]:
def get_html_tables(company_name, search_terms, startyear, startquarter=1, endyear=2018, endquarter=4, 
                    limit=3, mapping=dict(), default_side='left', max_tables=17):
    """
    Forms tables of the desired search terms of the given company in the given timeframe
    
    Parameters:
    See documentation of get_reports and make_tables.
    
    Returns:
    Generator: each element is a tuple with the form (year, quarter, amalgamated table)
    
    """
    
    reports = get_reports(company_name, startyear, startquarter, endyear, endquarter)
    for report in reports:
        year = report[0]
        quarter = report[1]
        text = req.get(report[2]).text
        if '<html' in text or '<HTML' in text:
            #print(report[0], 'Q', report[1])
            data = make_tables(company_name, text, limit, search_terms, year, quarter, mapping, default_side, max_tables)
            yield (year, quarter, data)

In [34]:
def get_ti_data():
    searchterms_ti = ['Net revenue', 'Cost of revenue', 'Revenue', 'Net income', 'Diluted', 'Total current assets', 'Total assets', 
                      'Total current liabilities', 'Total liabilities', 'Total stockholders equity', 'Depreciation', 
                      'operating activities', 'Cash and cash equivalents at beginning', 'Cash and cash equivalents beginning', 
                      'Cash and cash equivalents at end', 'Cash and cash equivalents end']
    reports_ti = get_html_tables('Texas Instruments', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                            max_tables=20, limit=5, search_terms=searchterms_ti)

    combined_df = pd.DataFrame()

    for report in reports_ti:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        new_columns = df.columns.tolist()
        if 'Net revenue' in df.columns:
            new_columns[new_columns.index('Net revenue')] = 'Revenue'
        if 'Cash and cash equivalents at beginning' in df.columns:
            new_columns[new_columns.index('Cash and cash equivalents at beginning')] = 'Cash and cash equivalents beginning'
        if 'Cash and cash equivalents at end' in df.columns:
            new_columns[new_columns.index('Cash and cash equivalents at end')] = 'Cash and cash equivalents end'
        df.columns = new_columns
        combined_df = combined_df.append(df)[df.columns.tolist()]

    diluted_col = combined_df['Diluted'].tolist()
    # Manually inputting these values as it was just the 1st quarter with different formatting 
    diluted_col[0:3] = [0.49, 0.44, 0.43] 
    combined_df['Diluted'] = diluted_col
    cash_begin_col = combined_df['Cash and cash equivalents beginning'].tolist()
    cash_begin_col[0] = 1328
    cash_end_col = combined_df['Cash and cash equivalents end'].tolist()
    cash_end_col[0] = 1450
    combined_df['Cash and cash equivalents beginning'] = cash_begin_col
    combined_df['Cash and cash equivalents end'] = cash_end_col
    return combined_df

In [39]:
other_companies = ['Nvidia']

testdf = pd.DataFrame([[1,2,3],[4,5,6], [7,8,9]], columns=['a','b','c'])


In [152]:
def get_amd_data():
    searchterms_amd = ['Net revenue', 'Cost of sales', 'Net income loss per common share', 'Net income', 'Net loss', 'Diluted',  
                       'diluted', 'Total current assets', 'Total assets', 'Total current liabilities', 
                       'Total stockholders equity', 'Total liability and stockholders equity', 
                       'Depreciation', 'operating activities', 'cash equivalents at beginning', 
                       'Cash cash equivalents and restricted cash at beginning', 'cash equivalents at end', 
                       'Cash cash equivalents and restricted cash at end']
    reports_amd = get_html_tables('Advanced Micro Devices', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                        max_tables=20, limit=5, search_terms=searchterms_amd)

    combined_df = pd.DataFrame()

    for report in reports_amd:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        #print(year, quarter, df.columns)
        new_columns = df.columns.tolist()
        if 'Net income loss per common share' in df.columns:
            if 'diluted' in df.columns:
                df.drop('diluted', axis=1, inplace=True)
                new_columns = df.columns.tolist()
                new_columns[new_columns.index('Net income loss per common share')] = 'Diluted'
            else:
                new_columns[new_columns.index('per common share')] = 'Diluted'
        if 'diluted' in df.columns and not 'Diluted' in df.columns:
            new_columns[new_columns.index('diluted')] = 'Diluted'
        if 'Net loss' in df.columns and not 'Net income' in df.columns:
            new_columns[new_columns.index('Net loss')] = 'Net income'
        if 'Cash cash equivalents and restricted cash at beginning' in df.columns:
            new_columns[new_columns.index('Cash cash equivalents and restricted cash at beginning')] = 'cash equivalents at beginning'
        if 'Cash cash equivalents and restricted cash at end' in df.columns:
            new_columns[new_columns.index('Cash cash equivalents and restricted cash at end')] = 'cash equivalents at end'        
        #print(new_columns)
        #display(df)
        df.columns = new_columns
        
        combined_df = combined_df.append(df)[df.columns.tolist()]

    return combined_df

In [256]:
def get_intel_data():
    searchterms_intel = ['Net revenue', 'Cost of sales', 'Operating expenses', 'Net income', 'Diluted earnings', 
                         'Total current assets', 'Total assets', 'Total current liabilities', 'Total stockholders equity', 
                         'Cash and cash equivalents beginning', 'Net cash provided by operating activities', 'Depreciation', 
                         'Cash and cash equivalents end', 'Earnings per share Diluted']
    reports_intel1 = get_html_tables('Intel Corp', startyear=2008, startquarter=1, endyear=2017, endquarter=4, 
                                    search_terms=searchterms_intel)
    reports_intel2 = get_html_tables('Intel Corp', startyear=2018, startquarter=1, endyear=2018, endquarter=4, 
                                    search_terms=searchterms_intel, max_tables=40)
    reports_intel = iter.chain(reports_intel1, reports_intel2)
    
    combined_df = pd.DataFrame()

    for report in reports_intel:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        if 'Earnings per share Diluted' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Earnings per share Diluted')] = 'Diluted earnings per share'
            df.columns = new_columns
        else:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Diluted earnings')] = 'Diluted earnings per share'
            df.columns = new_columns
        combined_df = combined_df.append(df)

    return combined_df

In [111]:
def get_cisco_data():
    searchterms_cisco = ['Total net sales', 'Total revenue', 'Total cost of sales', 'Total operating expenses', 'NET INCOME', 
                         'Net income per share diluted', 'Diluted', 'Total current assets', 'TOTAL ASSETS', 'Total current liabilities', 
                         'Total shareholders equity', 'Total equity', 'Total Cisco shareholders equity', 'Depreciation', 
                         'Net cash provided by operating activities', 'Cash and cash equivalents beginning', 
                         'Cash and cash equivalents end', 'Cash cash equivalents and restricted cash beginning', 
                         'Cash cash equivalents and restricted cash end']
    reports_cisco = get_html_tables('Cisco Systems', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                         search_terms=searchterms_cisco)

    combined_df = pd.DataFrame()

    for report in reports_cisco:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        #print(year, quarter, len(df.columns))
        new_columns = df.columns.tolist()
        if 'Total Cisco shareholders equity' in df.columns:
            new_columns[new_columns.index('Total Cisco shareholders equity')] = 'Total shareholders equity'
        if 'Total equity' in df.columns:
            new_columns[new_columns.index('Total equity')] = 'Total shareholders equity'
        if 'Diluted' in df.columns:
            new_columns[new_columns.index('Diluted')] = 'Net income per share diluted'
        if 'Total revenue' in df.columns:
            new_columns[new_columns.index('Total revenue')] = 'Total net sales'
        if 'Cash cash equivalents and restricted cash beginning' in df.columns:
            new_columns[new_columns.index('Cash cash equivalents and restricted cash beginning')] = 'Cash and cash equivalents beginning'
        if 'Cash cash equivalents and restricted cash end' in df.columns:
            new_columns[new_columns.index('Cash cash equivalents and restricted cash end')] = 'Cash and cash equivalents end'
        df.columns = new_columns

        combined_df = combined_df.append(df)[df.columns.tolist()] # TODO: Why is the last part necessary?
        #print(combined_df.columns)

    return combined_df

In [45]:
def get_amazon_data():
    searchterms_amazon = ['CASH AND CASH EQUIVALENTS BEGINNING', 'CASH CASH EQUIVALENTS AND RESTRICTED CASH BEGINNING', 
                          'Net income', 'Depreciation', 'Net cash provided by' ,'CASH AND CASH EQUIVALENTS END', 
                          'CASH CASH EQUIVALENTS AND RESTRICTED CASH END', 'Net sales', 'Total net sales', 'Cost of sales', 
                          'Total operating expenses', 'Diluted earnings per share', 'Total current assets', 'Total assets', 
                          'Total current liabilities', 'Total stockholders equity']
    reports_amazon = get_html_tables('Amazon com Inc', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                     search_terms=searchterms_amazon, limit=4)

    combined_df = pd.DataFrame()

    for report in reports_amazon:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        if 'Total net sales' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Total net sales')] = 'Net sales'
            df.columns = new_columns
        if 'CASH CASH EQUIVALENTS AND RESTRICTED CASH BEGINNING' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('CASH CASH EQUIVALENTS AND RESTRICTED CASH BEGINNING')] = 'CASH AND CASH EQUIVALENTS BEGINNING'
            df.columns = new_columns
        if 'CASH CASH EQUIVALENTS AND RESTRICTED CASH END' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('CASH CASH EQUIVALENTS AND RESTRICTED CASH END')] = 'CASH AND CASH EQUIVALENTS END'
            df.columns = new_columns
        combined_df = combined_df.append(df)

    return combined_df

In [7]:
def get_ibm_data():    
    searchterms_ibm = ['Total revenue', 'Total cost', 'Total expense and other income', 'Net income', 'Assuming dilution', 
                       'Total current assets', 'Total assets', 'Total current liabilities', 'Total liabilities', 
                       'Total stockholders equity', 'Total IBM stockholders equity', 'Depreciation', 
                       'Net cash provided by operating activities', 'Cash and cash equivalents at', 
                       'Cash cash equivalents and restricted cash at']
    mappings_ibm = {'Cash and cash equivalents at': [[0], 2], 'Cash cash equivalents and restricted cash at': [[0], 2]}
    reports_ibm = get_html_tables('International Business Machines', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                  search_terms=searchterms_ibm, limit=5, mapping=mappings_ibm)

    combined_df = pd.DataFrame()

    for report in reports_ibm:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        #print(year, quarter, len(df.columns))
        if year == 2008:
            df['Assuming dilution'] = round(df['Net income'] / df['Assuming dilution'], 2)
        if 'Total IBM stockholders equity' in df.columns:
            if 'Total stockholders equity' in df.columns:
                df.drop('Total IBM stockholders equity', axis=1, inplace=True)
            else:
                new_columns = df.columns.tolist()
                new_columns[new_columns.index('Total IBM stockholders equity')] = 'Total stockholders equity'
                df.columns = new_columns
        if 'Cash cash equivalents and restricted cash at1' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Cash cash equivalents and restricted cash at1')] = 'Cash and cash equivalents at1'
            df.columns = new_columns
        if 'Cash cash equivalents and restricted cash at2' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Cash cash equivalents and restricted cash at2')] = 'Cash and cash equivalents at2'
            df.columns = new_columns
        combined_df = combined_df.append(df)

    new_columns = ['Earnings/diluted share' if col == 'Assuming dilution' else col for col in combined_df.columns.tolist()]
    new_columns = ['Cash and cash equivalents beginning' if col == 'Cash and cash equivalents at1' else col for col in new_columns]
    new_columns = ['Cash and cash equivalents end' if col == 'Cash and cash equivalents at2' else col for col in new_columns]
    combined_df.columns = new_columns
    return combined_df

In [225]:
def get_google_data():
    searchterms_google = ['Revenues', 'Google advertising and other', 'Cost of revenues', 'Total costs and expenses', 
                           'stock diluted', 'share diluted', 'Diluted', 'Net income', 'Total current assets', 
                          'Total assets', 'Total current liabilities', 'Total stockholders equity', 'Depreciation', 
                           'Net cash provided by operating activities', 'Cash and cash equivalents at beginning', 
                          'Cash and cash equivalents at end']
    mappings_google = {'Revenues': [[1], 1], 'Google advertising and other': [[1], 1], 'stock diluted': [[1], 1],
                       'share diluted': [[1], 1], 'Cost of revenues': [[1], 1], 'Total costs and expenses': [[1], 1], 
                       'Net income': [[1], 1], 'Diluted': [[1], 1]}
    reports_google = get_html_tables('Google Inc', startyear=2008, startquarter=1, endyear=2015, endquarter=4, 
                                      mapping=mappings_google, search_terms=searchterms_google, default_side='right', 
                                     max_tables=50)
    reports_alphabet = get_html_tables('Alphabet Inc', startyear=2016, startquarter=1, endyear=2018, endquarter=4, 
                                      mapping=mappings_google, search_terms=searchterms_google, default_side='right', 
                                       max_tables=90)
    reports_google_combined = iter.chain(reports_google, reports_alphabet)
    
    combined_df = pd.DataFrame()
        
    for report in reports_google_combined:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        #display(df)
        if 'Google advertising and other' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Google advertising and other')] = 'Revenues'
            df.columns = new_columns
        if 'Diluted' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('Diluted')] = 'share diluted'
            df.columns = new_columns
        if 'stock diluted' in df.columns:
            new_columns = df.columns.tolist()
            new_columns[new_columns.index('stock diluted')] = 'share diluted'
            df.columns = new_columns
        if year < 2010: # After 2010, Google changed from counting in thousands to counting in millions
            for col in df.columns:
                if col.lower() != 'share diluted' and col.lower() != 'year' and col.lower() != 'quarter':
                    df[col] = round(df[col] / 1000)
        combined_df = combined_df.append(df)
    
    new_columns = ['Earnings/diluted share' if col == 'share diluted' else col for col in combined_df.columns.tolist()]
    combined_df.columns = new_columns
    return combined_df

In [139]:
def get_apple_data():    
    searchterms_apple = ['Net sales', 'Cost of sales', 'Total operating expenses', 'Net income', 'Diluted', 
                         'Total current assets', 'Total assets', 'Total current liabilities', 'Total liabilities', 
                         'Total shareholders equity', 'Cash and cash equivalents beginning of the period', 
                         'Depreciation', 'Cash generated by operating activities', 
                         'Cash and cash equivalents end of the period']
    mappings_apple = {'Diluted':[[0], 2]} # TODO: Get rid of number of diluted shares
    reports_apple = get_html_tables('Apple Inc', startyear=2008, startquarter=1, endyear=2018, endquarter=4, 
                                    mapping=mappings_apple, search_terms=searchterms_apple)

    combined_df = pd.DataFrame()

    for report in reports_apple:
        year = report[0]
        quarter = report[1]
        df = report[2].copy()
        #display(df)
        #print(year, quarter)
        combined_df = combined_df.append(df)
    
    new_columns = ['Earnings/diluted share' if col == 'Diluted1' 
                   else 'Number of diluted shares (thousands)' if col == 'Diluted2'
                   else col for col in combined_df.columns.tolist()]
    combined_df.columns = new_columns
    return combined_df

In [38]:
ti_df.columns = ['Year', 'Quarter', 'Revenue', 'Cost of revenue', 'Net income', 'Earnings per share', 'Total current assets', 
                 'Total assets', 'Total current liabilities', 'Total liabilities', "Shareholders' equity", 
                 'Depreciation & Amortization', 'Cash generated from operating activities', 'Cash at beginning of period', 
                 'Cash at end of period']
display(ti_df)

Unnamed: 0,Year,Quarter,Revenue,Cost of revenue,Net income,Earnings per share,Total current assets,Total assets,Total current liabilities,Total liabilities,Shareholders' equity,Depreciation & Amortization,Cash generated from operating activities,Cash at beginning of period,Cash at end of period
Texas Instruments 2008-2,2008,2,3272.0,1516.0,662.0,0.49,5975.0,12338.0,1848.0,2481.0,9857.0,241.0,641.0,1328.0,1450.0
Texas Instruments 2008-3,2008,3,3351.0,1602.0,588.0,0.44,6010.0,12382.0,1760.0,2398.0,9984.0,487.0,1160.0,1328.0,1317.0
Texas Instruments 2008-4,2008,4,3387.0,1744.0,563.0,0.43,6212.0,12466.0,1738.0,2372.0,10094.0,738.0,2212.0,1328.0,1715.0
Texas Instruments 2009-2,2009,2,2086.0,1280.0,17.0,0.01,5532.0,11520.0,1287.0,2310.0,9210.0,230.0,251.0,1046.0,1436.0
Texas Instruments 2009-3,2009,3,2457.0,1333.0,260.0,0.2,5740.0,11510.0,1468.0,2297.0,9213.0,451.0,808.0,1046.0,1765.0
Texas Instruments 2009-4,2009,4,2880.0,1399.0,538.0,0.42,6138.0,11903.0,1662.0,2465.0,9438.0,668.0,1642.0,1046.0,1294.0
Texas Instruments 2010-2,2010,2,3205.0,1516.0,658.0,0.52,6323.0,12370.0,1719.0,2565.0,9805.0,211.0,710.0,1182.0,1217.0
Texas Instruments 2010-3,2010,3,3496.0,1602.0,769.0,0.62,6130.0,12164.0,1538.0,2409.0,9755.0,426.0,1272.0,1182.0,1138.0
Texas Instruments 2010-4,2010,4,3740.0,1701.0,859.0,0.71,6468.0,12649.0,1838.0,2687.0,9962.0,639.0,2590.0,1182.0,1093.0
Texas Instruments 2011-2,2011,2,3392.0,1664.0,666.0,0.55,7044.0,13310.0,1793.0,2736.0,10574.0,224.0,516.0,1319.0,1343.0


In [156]:
amd_df.columns = ['Year', 'Quarter', 'Revenue', 'Cost of sales', 'Net income', 'Net income per share', 'Total current assets', 
                  'Total assets', 'Total current liabilities', "Total shareholders' equity", 'Depreciation & Amortization', 
                  'Cash generated by operating activities', 'Cash at beginning of period', 'Cash at end of period']
display(amd_df)

Unnamed: 0,Year,Quarter,Revenue,Cost of sales,Net income,Net income per share,Total current assets,Total assets,Total current liabilities,Total shareholders' equity,Depreciation & Amortization,Cash generated by operating activities,Cash at beginning of period,Cash at end of period
Advanced Micro Devices 2008-2,2008,2,1505.0,877.0,-358.0,-0.59,3513.0,11208.0,2738.0,2637.0,317.0,16.0,1432.0,1404.0
Advanced Micro Devices 2008-3,2008,3,1349.0,653.0,-1189.0,-1.96,3431.0,9784.0,2468.0,1474.0,627.0,-226.0,1432.0,1310.0
Advanced Micro Devices 2008-4,2008,4,1776.0,871.0,-127.0,-0.21,3297.0,9441.0,2392.0,1339.0,920.0,-442.0,1432.0,1108.0
Advanced Micro Devices 2009-2,2009,2,1177.0,666.0,-414.0,-0.66,3971.0,9052.0,2079.0,-163.0,280.0,-391.0,933.0,2402.0
Advanced Micro Devices 2009-3,2009,3,1184.0,743.0,-335.0,-0.49,3713.0,8683.0,2022.0,-465.0,562.0,-535.0,933.0,1979.0
Advanced Micro Devices 2009-4,2009,4,1396.0,811.0,-135.0,-0.18,3918.0,8747.0,2076.0,-569.0,844.0,-677.0,933.0,1847.0
Advanced Micro Devices 2010-2,2010,2,1574.0,833.0,257.0,0.35,3331.0,5232.0,1645.0,796.0,100.0,23.0,1657.0,642.0
Advanced Micro Devices 2010-3,2010,3,1653.0,915.0,-43.0,-0.06,3313.0,4955.0,1630.0,752.0,200.0,-75.0,1657.0,1084.0
Advanced Micro Devices 2010-4,2010,4,1618.0,879.0,-118.0,-0.17,3212.0,4595.0,1665.0,614.0,294.0,-199.0,1657.0,620.0
Advanced Micro Devices 2011-2,2011,2,1613.0,922.0,510.0,0.68,3411.0,5209.0,1375.0,1558.0,88.0,-168.0,606.0,602.0


In [260]:
intel_df.columns = ['Year', 'Quarter', 'Revenue', 'Cost of sales', 'Operating expenses', 'Net income', 'Earnings per share', 
                    'Current assets', 'Total assets', 'Current liabilities', "Stockholders' equity", 
                    'Cash at beginning of period', 'Depreciation & Amortization', 'Cash generated by operating activities', 
                    'Cash at end of period']
display(intel_df)

Unnamed: 0,Year,Quarter,Revenue,Cost of sales,Operating expenses,Net income,Earnings per share,Current assets,Total assets,Current liabilities,Stockholders' equity,Cash at beginning of period,Depreciation & Amortization,Cash generated by operating activities,Cash at end of period
Intel Corp 2008-2,2008,2,9673.0,4466.0,3145.0,1443.0,0.25,22064.0,53387.0,8668.0,40660.0,7307.0,1102.0,2215.0,5883.0
Intel Corp 2008-3,2008,3,9470.0,4221.0,2994.0,1601.0,0.28,19778.0,52392.0,8032.0,40361.0,7307.0,2144.0,5043.0,4079.0
Intel Corp 2008-4,2008,4,10217.0,4198.0,2921.0,2014.0,0.35,21423.0,52719.0,10068.0,38911.0,7307.0,3203.0,8330.0,3704.0
Intel Corp 2009-2,2009,2,7145.0,3907.0,2591.0,629.0,0.11,18142.0,48454.0,6341.0,39064.0,3350.0,1208.0,378.0,3536.0
Intel Corp 2009-3,2009,3,8024.0,3945.0,4091.0,-398.0,-0.07,18467.0,49061.0,7079.0,39047.0,3350.0,2419.0,3762.0,3826.0
Intel Corp 2009-4,2009,4,9389.0,3985.0,2825.0,1856.0,0.33,19247.0,50996.0,7749.0,39033.0,3350.0,3572.0,7765.0,4109.0
Intel Corp 2010-2,2010,2,10299.0,3770.0,3081.0,2442.0,0.43,23724.0,55773.0,8912.0,42900.0,3987.0,1080.0,4079.0,4988.0
Intel Corp 2010-3,2010,3,10765.0,3530.0,3254.0,2887.0,0.51,26464.0,57691.0,7937.0,45841.0,3987.0,2166.0,7565.0,5514.0
Intel Corp 2010-4,2010,4,11102.0,3781.0,3185.0,2955.0,0.52,29499.0,60588.0,8837.0,47696.0,3987.0,3252.0,11142.0,5517.0
Intel Corp 2011-2,2011,2,12847.0,4962.0,3727.0,3160.0,0.56,22795.0,65552.0,11565.0,47349.0,5498.0,1287.0,4013.0,4188.0


In [249]:
cisco_df.columns = ['Year', 'Quarter', 'Current assets', 'Total assets', 'Current liabilities', "Shareholders' equity", 
                    'Net sales', 'Cost of sales', 'Operating expenses', 'Net income', 'Earnings per share', 
                    'Depreciation & Amortization', 'Cash generated by operating activities', 'Cash at beginning of period', 
                    'Cash at end of period']
display(cisco_df)

Unnamed: 0,Year,Quarter,Current assets,Total assets,Current liabilities,Shareholders' equity,Net sales,Cost of sales,Operating expenses,Net income,Earnings per share,Depreciation & Amortization,Cash generated by operating activities,Cash at beginning of period,Cash at end of period
Cisco Systems 2008-1,2008,1,32442.0,55300.0,12193.0,32812.0,9831.0,3491.0,3936.0,2060.0,0.33,878.0,5532.0,3728.0,5202.0
Cisco Systems 2008-2,2008,2,34145.0,57123.0,13359.0,33138.0,9791.0,3486.0,4164.0,1773.0,0.29,1314.0,8560.0,3728.0,6154.0
Cisco Systems 2008-4,2008,4,35662.0,58887.0,13548.0,35035.0,10331.0,3650.0,4199.0,2201.0,0.37,393.0,2718.0,5191.0,4197.0
Cisco Systems 2009-1,2009,1,37995.0,61357.0,13604.0,36783.0,9089.0,3366.0,3950.0,1504.0,0.26,818.0,5916.0,5191.0,4175.0
Cisco Systems 2009-2,2009,2,41336.0,64666.0,12861.0,37256.0,8162.0,2933.0,3622.0,1348.0,0.23,1244.0,7912.0,5191.0,7359.0
Cisco Systems 2009-4,2009,4,44697.0,68680.0,13162.0,40002.0,9021.0,3133.0,3764.0,1787.0,0.3,429.0,1488.0,5718.0,4774.0
Cisco Systems 2010-1,2010,1,50139.0,76403.0,14405.0,41512.0,9815.0,3483.0,3962.0,1853.0,0.32,942.0,3974.0,5718.0,4710.0
Cisco Systems 2010-2,2010,2,49758.0,79292.0,18439.0,43830.0,10368.0,3738.0,4285.0,2192.0,0.37,1415.0,6941.0,5718.0,3961.0
Cisco Systems 2010-4,2010,4,50406.0,80015.0,18120.0,44675.0,10750.0,3995.0,4404.0,1930.0,0.34,553.0,1667.0,4581.0,3796.0
Cisco Systems 2011-1,2011,1,52066.0,81981.0,18505.0,45641.0,10407.0,4146.0,4577.0,1521.0,0.27,1240.0,4276.0,4581.0,4924.0


In [245]:
amazon_df.columns = ['Year', 'Quarter', 'Cash at beginning of period', 'Net income', 'Depreciation & Amortization', 
                     'Cash generated by operating activities', 'Cash at end of period', 'Net sales', 'Cost of sales', 
                     'Operating expenses', 'Earnings per share', 'Current assets', 'Total assets', 'Current liabilities', 
                     "Shareholders' equity"]
display(amazon_df)

Unnamed: 0,Year,Quarter,Cash at beginning of period,Net income,Depreciation & Amortization,Cash generated by operating activities,Cash at end of period,Net sales,Cost of sales,Operating expenses,Earnings per share,Current assets,Total assets,Current liabilities,Shareholders' equity
Amazon com Inc 2008-2,2008,2,2539.0,143.0,65.0,-645.0,1496.0,4135.0,3179.0,758.0,0.34,3965.0,5883.0,3551.0,1470.0
Amazon com Inc 2008-3,2008,3,1496.0,158.0,70.0,347.0,1548.0,4063.0,3096.0,750.0,0.37,4236.0,6322.0,3216.0,2230.0
Amazon com Inc 2008-4,2008,4,1548.0,118.0,76.0,424.0,1650.0,4264.0,3265.0,845.0,0.27,4430.0,6566.0,3144.0,2527.0
Amazon com Inc 2009-2,2009,2,2769.0,177.0,87.0,-585.0,1701.0,4889.0,3741.0,904.0,0.41,4775.0,6980.0,3410.0,2918.0
Amazon com Inc 2009-3,2009,3,1701.0,142.0,84.0,468.0,1936.0,4651.0,3518.0,974.0,0.32,5304.0,7675.0,3636.0,3256.0
Amazon com Inc 2009-4,2009,4,1936.0,199.0,96.0,799.0,2514.0,5449.0,4176.0,1022.0,0.45,6369.0,8972.0,4537.0,3585.0
Amazon com Inc 2010-2,2010,2,3444.0,299.0,119.0,-1098.0,1844.0,7131.0,5501.0,6737.0,0.66,7964.0,12042.0,5193.0,5618.0
Amazon com Inc 2010-3,2010,3,1844.0,207.0,129.0,250.0,1629.0,6566.0,4957.0,6296.0,0.45,8118.0,12397.0,5250.0,5857.0
Amazon com Inc 2010-4,2010,4,1629.0,231.0,150.0,855.0,1539.0,7560.0,5786.0,7292.0,0.51,9559.0,14162.0,6375.0,6397.0
Amazon com Inc 2011-2,2011,2,3777.0,201.0,202.0,-1586.0,2641.0,9857.0,7608.0,9535.0,0.44,11288.0,16882.0,7730.0,7347.0


In [233]:
ibm_df.columns = ['Year', 'Quarter', 'Revenue', 'Cost of revenue', 'Operating expenses', 'Net income', 'Earnings per share', 
                  'Current assets', 'Total assets', 'Current liabilities', 'Total liabilities', "Shareholders' equity", 
                  'Depreciation & Amortization', 'Cash generated by operating activities', 'Cash at beginning of period', 
                  'Cash at end of period']
display(ibm_df)

Unnamed: 0,Year,Quarter,Revenue,Cost of revenue,Operating expenses,Net income,Earnings per share,Current assets,Total assets,Current liabilities,Total liabilities,Shareholders' equity,Depreciation & Amortization,Cash generated by operating activities,Cash at beginning of period,Cash at end of period
International Business Machines 2008-2,2008,2,24502.0,14336.0,6968.0,2319.0,1.65,48425.0,121823.0,47048.0,93095.0,28728.0,1030.0,4202.0,14991.0,10786.0
International Business Machines 2008-3,2008,3,26820.0,15221.0,7786.0,2765.0,1.98,46312.0,120928.0,44683.0,92663.0,28264.0,2096.0,8453.0,14991.0,9626.0
International Business Machines 2008-4,2008,4,25302.0,14342.0,7064.0,2824.0,2.05,44187.0,115910.0,44752.0,88391.0,27519.0,3144.0,12191.0,14991.0,9755.0
International Business Machines 2009-2,2009,2,21711.0,12280.0,6309.0,2295.0,1.7,43995.0,101944.0,37425.0,88252.0,13693.0,917.0,4386.0,12741.0,12294.0
International Business Machines 2009-3,2009,3,23250.0,12669.0,6319.0,3103.0,2.32,44435.0,103655.0,36430.0,88182.0,15380.0,1853.0,9127.0,12741.0,11678.0
International Business Machines 2009-4,2009,4,23566.0,12938.0,6255.0,3214.0,2.4,43446.0,103675.0,32960.0,85182.0,18396.0,2806.0,14325.0,12741.0,9532.0
International Business Machines 2010-2,2010,2,22857.0,12880.0,6462.0,2601.0,1.97,45697.0,105208.0,34575.0,83059.0,22033.0,924.0,4437.0,12183.0,12472.0
International Business Machines 2010-3,2010,3,23724.0,12915.0,6234.0,3386.0,2.61,44895.0,103420.0,34993.0,82244.0,21059.0,1823.0,8203.0,12183.0,10325.0
International Business Machines 2010-4,2010,4,24271.0,13270.0,6324.0,3589.0,2.82,44469.0,107174.0,35443.0,84815.0,22238.0,2737.0,12754.0,12183.0,9859.0
International Business Machines 2011-2,2011,2,24607.0,13749.0,7041.0,2863.0,2.31,47524.0,112960.0,40387.0,90185.0,22671.0,918.0,3792.0,10661.0,12763.0


In [232]:
google_df.columns=['Year', 'Quarter', 'Current assets', 'Total assets', 'Current liabilities', "Shareholders' equity", 
                   'Revenue', 'Cost of revenue', 'Operating expenses', 'Net income', 'Earnings per share', 
                   'Depreciation & Amortization', 'Cash generated by operating activities', 'Cash at beginning of period', 
                   'Cash at end of period']
display(google_df)

Unnamed: 0,Year,Quarter,Current assets,Total assets,Current liabilities,Shareholders' equity,Revenue,Cost of revenue,Operating expenses,Net income,Earnings per share,Depreciation & Amortization,Cash generated by operating activities,Cash at beginning of period,Cash at end of period
Google Inc 2008-2,2008,2,15465.0,27605.0,2460.0,24338.0,5186.0,2111.0,3640.0,1307.0,4.12,281.0,1779.0,6082.0,6520.0
Google Inc 2008-3,2008,3,16317.0,29180.0,2346.0,25913.0,5367.0,2148.0,3789.0,1247.0,3.92,589.0,3546.0,6082.0,7364.0
Google Inc 2008-4,2008,4,17963.0,30807.0,2224.0,27475.0,5541.0,2173.0,3894.0,1290.0,4.06,899.0,5731.0,6082.0,8370.0
Google Inc 2009-2,2009,2,22081.0,33513.0,2184.0,29848.0,5509.0,2102.0,3625.0,1423.0,4.49,321.0,2250.0,8657.0,10426.0
Google Inc 2009-3,2009,3,23835.0,35159.0,2001.0,31595.0,5523.0,2108.0,3649.0,1485.0,4.66,632.0,3859.0,8657.0,11911.0
Google Inc 2009-4,2009,4,26354.0,37703.0,2322.0,33722.0,5945.0,2226.0,3871.0,1639.0,5.13,943.0,6585.0,8657.0,12087.0
Google Inc 2010-2,2010,2,31132.0,42871.0,2922.0,38283.0,6775.0,2452.0,4287.0,1955.0,6.06,264.0,2584.0,10198.0,9192.0
Google Inc 2010-3,2010,3,35161.0,48045.0,5747.0,40613.0,6820.0,2467.0,4455.0,1840.0,5.71,530.0,4669.0,10198.0,10713.0
Google Inc 2010-4,2010,4,39447.0,53342.0,8530.0,43290.0,7286.0,2552.0,4739.0,2167.0,6.72,787.0,7555.0,10198.0,11257.0
Google Inc 2011-2,2011,2,43310.0,59960.0,9332.0,48851.0,8575.0,2936.0,6279.0,1798.0,5.51,301.0,3172.0,13630.0,12415.0


In [237]:
apple_df.columns = ['Year', 'Quarter', 'Net sales', 'Cost of sales', 'Operating expenses', 'Net income', 'Earnings per share', 
                    'Current assets', 'Total assets', 'Current liabilities', 'Total liabilities', "Shareholders' equity", 
                    'Cash at beginning of period', 'Depreciation & Amortization', 'Cash generated by operating activities', 
                    'Cash at end of period']
display(apple_df)

Unnamed: 0,Year,Quarter,Net sales,Cost of sales,Operating expenses,Net income,Earnings per share,Current assets,Total assets,Current liabilities,Total liabilities,Shareholders' equity,Cash at beginning of period,Depreciation & Amortization,Cash generated by operating activities,Cash at end of period
Apple Inc 2008-1,2008,1,9608.0,6276.0,1206.0,1581.0,1.76,26189.0,30039.0,10535.0,13235.0,16804.0,9352.0,106.0,2787.0,9162.0
Apple Inc 2008-2,2008,2,7512.0,5038.0,1159.0,1045.0,1.16,26736.0,30471.0,9634.0,12418.0,18053.0,9352.0,222.0,3980.0,9070.0
Apple Inc 2008-3,2008,3,7464.0,4864.0,1208.0,1072.0,1.19,27998.0,31709.0,9218.0,12087.0,19622.0,9352.0,339.0,5301.0,9373.0
Apple Inc 2009-1,2009,1,10167.0,6635.0,1406.0,1605.0,1.78,35163.0,42787.0,14757.0,19878.0,22909.0,11875.0,158.0,3938.0,7236.0
Apple Inc 2009-2,2009,2,8163.0,5192.0,1304.0,1205.0,1.33,33853.0,43237.0,13751.0,18926.0,24311.0,11875.0,330.0,4779.0,4466.0
Apple Inc 2009-3,2009,3,8337.0,5314.0,1351.0,1229.0,1.35,35170.0,48140.0,16661.0,22252.0,25888.0,11875.0,506.0,7049.0,5605.0
Apple Inc 2010-1,2010,1,15683.0,9272.0,1686.0,3378.0,3.67,33332.0,53926.0,13097.0,18158.0,35768.0,5263.0,209.0,5781.0,7609.0
Apple Inc 2010-2,2010,2,13499.0,7874.0,1646.0,3074.0,3.33,32336.0,57057.0,12229.0,17709.0,39348.0,5263.0,425.0,8111.0,10018.0
Apple Inc 2010-3,2010,3,15700.0,9564.0,1902.0,3253.0,3.51,36033.0,64725.0,15612.0,21614.0,43111.0,5263.0,698.0,12912.0,9705.0
Apple Inc 2011-1,2011,1,26741.0,16443.0,2471.0,6004.0,6.43,43927.0,86742.0,23795.0,32076.0,54666.0,11261.0,356.0,9773.0,10734.0
