In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import mplfinance as mpf
import matplotlib.dates as mdates
import pandas_datareader.data as web
from datetime import timedelta
from datetime import datetime
import seaborn as sns
import requests
from bs4 import BeautifulSoup
%matplotlib inline
style.use('ggplot')
import json
from dateutil.parser import parse
import sqlalchemy
import pymysql

#The goal of this project is to automate the scraping, organizing, cleaning, and storing of the balance sheet,
#income statement, and cash flow statement for all publicly traded companies that have filed there 10-k, 20-F, or 40-F

ticker = 'aig'.upper() 
end = datetime.now()
ticker_list = pd.read_excel("/Users/danieldaniel/Companies_list.xlsx")
ticker_list_final = ticker_list.iloc[0:5407,0:3].to_numpy()
if ticker in ticker_list_final:
    
    ticker_url = r"https://www.sec.gov/files/company_tickers.json"
    content = requests.get(ticker_url)
    decoded_ticker_content = content.json() # JSON structure that will turn into a python dictionary.
    
    ticker_content_df = pd.DataFrame(decoded_ticker_content)
    ticker_content_df_clean = ticker_content_df.T

    ID = ticker_content_df_clean[ticker_content_df_clean['ticker']==ticker.upper()].index.values
    ID_num = int(ID)

    cik_num = ticker_content_df_clean.iloc[ID_num, 0]
    cik_num_str = str(cik_num)

    ticker_content_df_clean.iloc[ID_num,:]

    
# PASSING DICT PARAMETERS
    find_no_id = []
    Financial_statement_type = ['10-K','20-F','40-F']
    for file_type in Financial_statement_type:
        endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar"

        param_dict = {'action': 'getcompany',
                      'CIK' : cik_num_str,
                      'type' : file_type, # 40-F Annual reports - Canadian issuers 
                      'dateb' : end,  # Change date to now, use datetime.now() so it refreshes everyday.
                      'owner' : 'exclude',
                      'start': '',
                      'output': '',
                      'count' : '20'}

        response = requests.get(url = endpoint, params = param_dict)
        soup  = BeautifulSoup(response.content, 'html.parser')

    #     print('Request Successful') # Let the user know that it was successful
    #     print(response.url)

    
# SCRAPE AND SAVE FINANCIALS DATA
        doc_table = soup.find_all('table', class_= 'tableFile2') # Pull the whole table: (table class="tableFile2")
        base_url_sec = r"http://www.sec.gov"
        master_list = []
        for row in doc_table[0].find_all('tr'):   # Find all table row and then find all table data. Table data is like columns
            cols = row.find_all('td')
            if len(cols) != 0:  # Only count if cols is not equal to zero, AKA, the column headers row

                filing_type = cols[0].text.strip() # Which file term - 10-K or 10-K/A, 20-F or 20-F/A, 40-F or 40-F/A, etc.
                filing_date = cols[3].text.strip() # When was the file posted, by date.

                filing_doc_href = cols[1].find('a', {'href': True, 'id':'documentsbutton'})    # Pull all documents href
                filing_id_href = cols[1].find('a', {'href': True, 'id':'interactiveDataBtn'}) # Pull all interactive data href

                if filing_doc_href != None:     # if filing_doc_href equals something. It is a double negative
                    filing_doc_link = base_url_sec + filing_doc_href['href']
                else:
                    filing_doc_link = 'no link'

                if filing_id_href != None:
                    filing_id_link = base_url_sec + filing_id_href['href']
                else:
                    filing_id_link = 'no link'

                file_dict = {}
                file_dict['file_type'] = filing_type
                file_dict['file_date'] = filing_date
                file_dict['links'] = {}
                file_dict['links']['documents'] = filing_doc_link
                file_dict['links']['interactive_data'] = filing_id_link

    #             print('-'*100)
    #             print('Filing Type: ' + filing_type)
    #             print('Filing Date: ' + filing_date)
    #             print('Document Link: ' + filing_doc_link)
    #             print('Interactive Data Link: ' + filing_id_link)

                master_list.append(file_dict)
                find_no_id.append(filing_id_link)


    all_xml_forms = []
    Financial_statement_type = ['10-K','20-F','40-F']
    for file_type in Financial_statement_type:
        endpoint = r"https://www.sec.gov/cgi-bin/browse-edgar"

        param_dict = {'action': 'getcompany',
                      'CIK' : cik_num_str,
                      'type' : file_type, # 40-F Annual reports - Canadian issuers 
                      'dateb' : end,  # Change date to now, use datetime.now() so it refreshes everyday.
                      'owner' : 'exclude',
                      'start': '',
                      'output': 'atom',
                      'count' : '100'}
        ##
        response = requests.get(url = endpoint, params = param_dict)
        soup  = BeautifulSoup(response.content, 'lxml')

    #     print('Request Successful') # Let the user know that it was successful
    #     print(response.url)

        entries = soup.find_all('entry')

        master_list_xml = []

        for entry in entries[0:10]:  #Only include years with 'act' variable in xml structure: entries[0:17]

            accession_num = entry.find('accession-number').text

            entry_dict = {}
            entry_dict[accession_num] = {}

            category_info = entry.find('category')
            entry_dict[accession_num]['category'] = {}
            entry_dict[accession_num]['category']['label'] = category_info['label']
            entry_dict[accession_num]['category']['scheme'] = category_info['scheme']
            entry_dict[accession_num]['category']['term'] = category_info['term'] # Which file term - 10-K or 10-K/A, 20-F or 20-F/A, 40-F or 40-F/A, etc.

            entry_dict[accession_num]['file_info'] = {}
        #     entry_dict[accession_num]['file_info']['act'] = entry.find('act').text 
            # There is no 'act' variable in the xml structure until 2005 for all companies, so an error will follow by running 
            # this cell. Must not include act variable to get files for all of the years.
            entry_dict[accession_num]['file_info']['file_number'] = entry.find('file-number').text
            entry_dict[accession_num]['file_info']['file_number_href'] = entry.find('file-number-href').text
            entry_dict[accession_num]['file_info']['filing_date'] = entry.find('filing-date').text
            entry_dict[accession_num]['file_info']['filing_href'] = entry.find('filing-href').text
            entry_dict[accession_num]['file_info']['filing_type'] = entry.find('filing-type').text
            entry_dict[accession_num]['file_info']['form_number'] = entry.find('film-number').text
            entry_dict[accession_num]['file_info']['form_name'] = entry.find('form-name').text
            entry_dict[accession_num]['file_info']['file_size'] = entry.find('size').text

            entry_dict[accession_num]['request_info'] = {}
            entry_dict[accession_num]['request_info']['link'] = entry.find('link')['href']
            entry_dict[accession_num]['request_info']['title'] = entry.find('title').text
            entry_dict[accession_num]['request_info']['last_updated'] = entry.find('updated').text  

            master_list_xml.append(entry_dict)

    #         print('-'*100)
    #         print(entry.find('category')['term'])
    #         print(entry.find('updated').text[0:-1]) # only print up until the date, not the time of the day.
    #     ##    print(entry.find('form-name').text)
    #     ##    print(entry.find('file-number').text)
    #     ##    print(entry.find('file-number-href').text)
    #         print(entry.find('link')['href'])
            all_xml_forms.append(entry.find('link')['href'])

    find_no_id
    indices_html = [index for index, element in enumerate(find_no_id) if element == 'no link'] # The count starts at zero.
    # print('List of indices with no links: ', indices_html)

    look = [x.split('-')[1] for x in all_xml_forms]
    # print('Years in each link: ',look)

    def Repeat(x): 
        size = len(x) 
        repeated = [] 
        for i in range(size): 
            k = i + 1
            for j in range(k, size): 
                if x[i] == x[j] and x[i] not in repeated: 
                    repeated.append(x[i]) 
        return repeated 

    repeat = Repeat(look)
    # print('Which year or years are repeated: ',repeat)

    repeat_1 = ''.join(map(str,repeat))
    n = 2
    repeat_list = [repeat_1[i:i+n] for i in range(0, len(repeat_1), n)]

    indices_xml = [[i for i in range(len(look)) if look[i] == r] for r in repeat_list]
    indices_xml_new = [x for sublist in indices_xml for x in sublist]
    # print('List of indices of years repeated: ',indices_xml_new)

    combined_indices = sorted(indices_xml_new + indices_html)
    # print('List of indices combined: ',combined_indices)

    drop_indices = Repeat(combined_indices)
    # print('Which indices to drop in our combined list: ',drop_indices)

    iterable = iter(indices_xml_new)
    drop = []
    for x in iterable:
        drop_it = min(x, next(iterable))
        drop.append(drop_it)
    # print(drop)
    final_links = [i for j, i in enumerate(all_xml_forms) if j not in drop]
    
    # All of this code above only drops the urls that have no interactive data links with repeated years as well, but
    # I must decide how to show companies that have no interactive data for all of the years.
    pic_a_year = final_links[0:1]

    listToStr = ' '.join(map(str, pic_a_year))

    listToStr_clean = "/".join(listToStr.split("/")[:-1])

    files_url = listToStr_clean + "/index.json"

    base_url = r"http://www.sec.gov"

    documents_url = files_url

    content = requests.get(documents_url).json()

    for file in content['directory']['item']:

        if file['name'] == 'FilingSummary.xml':

            xml_summary = base_url + content['directory']['name'] + '/' + file['name']

    #         print('-'*100)
    #         print('File Name: ' + file['name'])
    #         print('File Path: ' + xml_summary)

    base_url = xml_summary.replace('FilingSummary.xml','')
    # Replace FilingSummary.xml with a blank space-Output: http://www.sec.gov/Archives/edgar/data/1318605/000156459019003165/
    content = requests.get(xml_summary).content

    soup = BeautifulSoup(content, 'lxml')

    reports = soup.find('myreports')

    short_list = []

    master_reports = []

    for report in reports.find_all('report')[0:20]: # find all reports in 'myreports' except for the last 'myreport'

        report_dict = {}
        report_dict['name_short'] = report.shortname.text
        report_dict['name_long'] = report.longname.text
        report_dict['position'] = report.position.text
        report_dict['category'] = report.menucategory.text
        report_dict['url'] = base_url + report.htmlfilename.text

        master_reports.append(report_dict)
        short_list.append(report_dict['name_short'])
    #     print('-'*100)
    #     print(base_url + report.htmlfilename.text)
    #     print(report.longname.text)
    #     print(report.shortname.text)
    #     print(report.menucategory.text)
    #     print(report.position.text)


    item1 = ''
    item2 = ''
    item3 = ''
    item4 = ''
    state = []
    statement_url = []
    for report_dict in master_reports:
        for short in short_list[0:20]:
            if short in ['Consolidated Statements of Income and of Comprehensive Income','Combined Statements of Profit or Loss and Other Comprehensive Income','Consolidated Statements of Net Earnings (Loss)','CONSOLIDATED STATEMENTS OF LOSS (PROFIT) AND OTHER COMPREHENSIVE LOSS (PROFIT)','CONSOLIDATED STATEMENTS OF EARNINGS AND COMPREHENSIVE EARNINGS','Consolidated Statement of Loss and Comprehensive Income (Loss)','CONSOLIDATED STATMENTS OF OPERATIONS','Consolidated Statements of Earnings and Comprehensive Income (Loss)','Consolidated Statements of Net Earnings and Comprehensive (Loss) Income','Consolidated statements of loss and comprehensive loss','Amended and Restated Consolidated Statements of Loss and Comprehensive Loss','Consolidated Statements of Earnings (Loss) Statement','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE EARNINGS (LOSS)','Consolidated Statments of Earnings','Consolidated statements of earnings','Consolidated statements of income and other comprehensive income','Consolidated statements of income and comprehensive income','COMBINED AND CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME (LOSS)','CONSOLIDATED STATEMENT OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','Statements of income','Consolidated Statements Of Profit Or Loss And Other Comprehensive Income','CONSOLIDATED STATEMENT OF PROFIT OR LOSS AND OTHER COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF COMPREHENSIVE (LOSS) /INCOME','Consolidated statements of income (loss) and comprehensive income (loss)','Consolidated Statements of Operations and Comprehensive Loss (Successor Basis)','CONSOLIDATED AND COMBINED STATEMENTS OF PROFIT OR LOSS AND OTHER COMPREHENSIVE INCOME','Consolidated Statements of Income/(Loss) and Comprehensive Income/(Loss)','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE (LOSS)/INCOME','CONSOLIDATED STATEMENTS OF (LOSS)/INCOME','Consolidated Statements of Operations and Comprehensive Income (loss)','CONSOLIDATED STATEMENTS OF PROFIT AND LOSS AND OTHER COMPREHENSIVE INCOME','COMBINED AND CONSOLIDATED STATEMENTS OF OPERATIONS','Statements of Income/(Loss)','Consolidated statement of operations','Consolidated Statement of Income (Loss)','Consolidated Statements of Loss and Other Comprehensive Loss','Consolidated Statement of Profit or Loss and Comprehensive Income','Consolidated Statements of Profit & Loss','Statements of Loss and Comprehensive Loss','Consolidated Statement of Operations and Comprehensive Loss','Consolidated Statement of Profit or Loss and Other Comprehensive Income / (Loss)','Consolidated statements of profit or loss and total comprehensive income or loss','CONSOLIDATED STATEMENTS OF PROFIT OR LOSS AND OTHER COMPREHENSIVE INCOME OR LOSS','Consolidated Statements of Income/(Loss)','CONSOLIDATED STATEMENTS OF OPERATIONS','Consolidated statement of profit or loss and other comprehensive loss','CONSOLIDATED STATEMENTS OF PROFIT AND OTHER COMPREHENSIVE INCOME','Consolidated Statement of Profit or Loss and Other Comprehensive Income (Loss)','Statements of Earnings and Comprehensive Loss (Twelve Month and Month Ended 2017 Unaudited)','CONSOLIDATED STATEMENTS OF PROFIT AND LOSS','Consolidated Statements of Profit and Loss','Consolidated Statemenets of Operations','Consolidated statements of net income (loss)','CONSOLIDATED STATEMENTS OF OPERATING RESULTS','Consolidated statement of profit or loss and other comprehensive income','Consolidated Statements of Comprehensive (Loss) /  Income','Consolidated Statements of Operations and Comprehensive Loss (Income)','Group Income Statement','Consolidated Statement of Profit (Loss) and Other Comprehensive Income','CONSOLIDATED STATEMENTS OF INCOME AND COMPREHENSIVE INCOME/(LOSS)','Consolidated Statements of Profit or Loss and Other Comprehensive Loss','Consolidated Statements of Loss and Comprehensive Loss (Income)','Statements of Profit or Loss','Consolidated income statements','Consolidated statements of profit or loss and other comprehensive income','Statement of profit or loss','CONSOLIDATED STATEMENT OF INCOME BY FUNCTION','Consolidated Statements of Profit or Loss','Consolidated Statements of Profit and Loss and Other Comprehensive Income','Consolidated statement of profit or loss','Consolidated Statement of Profit or Loss and Other Comprehensive Income','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE (LOSS) / INCOME','Group Income Statements','CONSOLIDATED PROFIT AND LOSS ACCOUNT','CONSOLIDATED STATEMENTS OF PROFIT OR LOSS AND OTHER COMPREHENSIVE INCOME','Consolidated Statements of Profit or Loss and Other Comprehensive Income (Loss)','Consolidated Statement of Profit or Loss','CONSOLIDATED STATEMENT OF PROFIT OR LOSS','Consolidated statements of profit or loss','GROUP INCOME STATEMENT','Financial Statements - Consolidated Income Statements','CONSOLIDATED STATEMENTS OF PROFIT OR LOSS','Consolidated income statement of Aegon N.V.','Consolidated Statements of Profit or Loss and Other Comprehensive Income','Income statements','Consolidated income statement','Income Statement','Group income statement','Consolidated Statements of Income and Other Comprehensive Income','Consolidated Statements of Earnings (Losses)','Combined Statements of Operations and Comprehensive Loss','Statement of Operations and Comprehensive Income','Statements of Operations and Comprehensive (Loss) Income','CONSOLIDATED STATEMENTS OF INCOME (Unaudited)','Combined and Consolidated Statements of Operations','Consolidated and Combined Consolidated Statements of Operations','Consolidated and Combined Statements of Income','Consolidated Statements of (Loss)','CONDENSED CONSOLIDATED STATEMENTS OF (LOSS) EARNINGS','Condensed Statements of Operations and Comprehensive Loss','COMBINED AND CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','Statements of Operations and Comprehensive Income','Consolidated & Combined Statements of Operations','Statement of Operations','CONSOLIDATED COMBINED STATEMENTS OF OPERATIONS','Condensed Consolidated and Combined Statements of Income','Condensed Interim Statements of Operations','Consolidated and Combined Statements of Operations and Comprehensive Income','Consolidated and Combined Statements of Operations and Comprehensive Loss','Statements of Operation and Comprehensive Loss','CONSOLIDATED AND COMBINED STATEMENT OF OPERATIONS','CONSOLIDATED AND COMBINED STATEMENTS OF OPERATIONS','Consolidated and Combined Statements of Income (Loss)','Consolidated and Combined Statements of Operations','Consolidated and Combined Statements of Operations and Comprehensive (Loss) Income','Consolidated and Combined Statements of (Loss) Income','Consolidated Statements of Earnings and Comprehensive Earnings (Loss)','Consolidated Statements of Operations and Comprehensive Loss (Unaudited)','Consolidated statements of income','Condensed Consolidated Statements of Operations and Comprehensive Loss (Unaudited)','CONSOLIDATED STATEMENTS OF LOSS AND COMPREHENSIVE LOSS','Consolidated Statements of Operations and Comprehensive Earnings','COMBINED CONSOLIDATED STATEMENTS OF OPERATIONS','CONDENSED STATEMENTS OF OPERATIONS','Combined Statements of Operations','Consolidated statements of operations and comprehensive income','STATEMENTS OF INCOME','Consolidated Statements of Operations Statement','CONSOLIDATED STATEMENT OF INCOME (LOSS) AND COMPREHENSIVE INCOME (LOSS)','Consolidated Statements of (Loss) Income and Comprehensive (Loss) Income','CONSOLIDATED STATEMENTS OF NET AND COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (HD Supply, Inc.)','Statement of Consolidated Operations and Comprehensive Loss','STATEMENT OF OPERATIONS AND COMPREHENSIVE LOSS','CONDENSED CONSOLIDATED STATEMENT OF OPERATIONS (unaudited)','CONSOLIDATED STATEMENTS OF INCOME AND COMPREHENSIVE INCOME (LOSS)','Condensed Consolidated Statements of Operations and Comprehensive Income','Condensed Statements of Operations','Consolidated Statements of Net Loss','Condensed Consolidated Statements of Operations and Comprehensive Income (Loss)','Statements of operations','Condensed Consolidated Statement of Operations','CONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME','Condensed Consolidated Statements of Loss and Comprehensive Loss','Consolidated statements of operations and comprehensive loss','FIRST FINANCIAL NORTHWEST, INC. AND SUBSIDIARIES CONSOLIDATED INCOME STATEMENTS','Condensed Consolidated Statements of Operations and Comprehensive Loss','Statements Of Income','Consolidated Statements Of Income (Loss)','Consolidated Statements of Earnings (Loss) and Comprehensive Income (Loss)','Consolidated Statements of Income and Expenses (Unaudited)','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE OPERATIONS (Unaudited)','CONSOLIDATED STATEMENTS OF INCOME/(LOSS)','Consolidated Statements of Operations and Other Comprehensive Loss','Consolidated statements of operations','Statements of Income and Expenses','CONSOLIDATED STATEMENTS OF OPERATIONS Statement','Consolidated Statements of Earnings and Other Comprehensive Income','Unaudited Condensed Statements of Operations','CONSOLIDATED STATEMENTS OF LOSS','Statement Of Operations','Consolidated Statements of Operations and Other Comprehensive (Loss) Income','Consolidated Income Statements (Unaudited)','CONSOLIDATED STATEMENTS OF EARNINGS (LOSS) AND COMPREHENSIVE ( LOSS) INCOME','CONSOLIDATED STATEMENTS OF INCOME AND COMPREHENSIVE LOSS','Consolidated Statements of Income or Loss','Consolidated Condensed Statements of Income (Operations) (Unaudited)','CONSOLIDATED CONDENSED STATEMENTS OF INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS (LOSS)','Audited Consolidated Statements of Operations and Comprehensive Income (Loss)','Statements Of Consolidated Income (Loss)','Consolidated Statements of Loss','Consolidated Statements of Earnings and Comprehensive (Loss) Income','Consolidated Statements Of Income (Loss) And Comprehensive Income (Loss)','Consolidated Statements of (Loss)/Income and Comprehensive (Loss)/Income','Consolidated Statements of Operation and Comprehensive Loss','Statements of Operations And Comprehensive Loss','Consolidated Statement of income','Consolidated Statements of Operations And Comprehensive Loss','Consolidated Statements of Income Statement','Consolidated Statement of Operations and Comprehensive Income (Loss)','Consolidated Statements Of Income and Comprehensive Income','Statements of Operations and Comprehensive Loss','Consolidated Statements of Income and Other Comprehensive Income (Loss)','CONSOLIDATED STATEMENTS OF EARNINGS AND COMPREHENSIVE INCOME','CONDENSED CONSOLIDATED STATEMENT OF INCOME','Consolidated Statements of (Loss) / Income','CONSOLIDATED STATEMENTS OF OPERATIONS CONSOLIDATED STATEMENTS OF OPERATIONS','Consolidated Statements of (Loss) Income and Comprehensive Income','Unaudited Condensed Consolidated Statements of Operations','CONSOLIDATED STATEMENTS OF OPERATIONS AND STATEMENTS OF OTHER COMPREHENSIVE INCOME/(LOSS)','CONSOLIDATED STATEMENTS OF EARNINGS, COMPREHENSIVE INCOME AND RETAINED EARNINGS','CONDENSED CONSOLIDATED STATEMENTS OF EARNINGS AND COMPREHENSIVE INCOME','Consolidated Statements Of (Loss) Earnings','BorgWarner Inc. and Consolidated Subsidiaries Consolidated Statements of Operations','STATEMENTS OF OPERATIONS AND COMPREHENSIVE LOSS','Consolidated Statements Of Operations and Comprehensive Income','Condensed Consolidated Statements of Operations (Unaudited)','CONSOLIDATED STATEMENTS OF INCOME AND OTHER COMPREHENSIVE INCOME','CONSOLIDATED STATEMENT OF OPERATIONS AND COMPREHENSIVE INCOME','UNAUDITED CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE LOSS','JEWETT-CAMERON TRADING COMPANY LTD. AND SUBSIDIARIES CONSOLIDATED STATEMENTS OF OPERATIONS','CONSOLIDATED STATEMENTS OF (LOSS) EARNINGS','CONSOLIDATED STATEMENTS OF (LOSS) INCOME AND COMPREHENSIVE (LOSS) INCOME','CONSOLIDATED STATEMENTS OF INCOME (LOSS) AND COMPREHENSIVE INCOME (LOSS)','Consolidated Statements of Loss and Comprehensive Loss','Consolidated Statements of Operations and Comprehensive Income/(Loss)','STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','STATEMENT OF OPERATIONS','Consolidated Statements of Operations and Other Comprehensive Income (Loss)','CONDENSED STATEMENTS OF OPERATIONS AND COMPREHENSIVE LOSS','CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)','Consolidated Statements of Net Income','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME OR LOSS','CONSOLIDATED STATEMENTS OF OPERATIONS Consolidated Statement of Operations','Consolidated Statements of (Loss) Income','Carnival Corporation & PLC Consolidated Statements of Income','Consolidated Statements Of Operations And Comprehensive Loss','CONSOLIDATED STATEMENTS OF OPERATIONS AND OTHER COMPREHENSIVE INCOME (LOSS)','CONSOLIDATED AND COMBINED STATEMENTS OF INCOME','Consolidated Statements of Income (Loss) and Comprehensive Income (Loss)','CONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE LOSS','Consolidated Statements of Net Income and Comprehensive Income','Consolidated Statements of Operations and Comprehensive Income / (Loss)','Consolidated Statements Of Operations And Comprehensive Income','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE (LOSS) INCOME','Consolidated Statements of Income and Total Comprehensive Income','Consolidated Statements of Operations and Comprehensive Earnings and Loss','Consolidated Statements of Operations and Comprehensive Income','Condensed Consolidated Statements of Earnings and Comprehensive Earnings','Consolidated Statements of Operations and Comprehensive (Loss) Income','Consolidated Statements of Income and Comprehensive Income (Loss)','Consolidated Statements Of Income And Other Comprehensive Income','Condensed Consolidated Income Statements Of Income and Comprehensive Income','Consolidated Statements Of Income And Comprehensive Income','CONSOLIDATED STATEMENT OF INCOME AND COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','CONSOLIDATED STATEMENTS OF INCOME AND COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS) (Audited)','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE LOSS','Consolidated Statements of Operations and Comprehensive Income (Loss)','Consolidated Statements of Earnings and Comprehensive Earnings','Consolidated Statements Of Operations And Comprehensive Income (Loss)','Consolidated Statements of Earnings and Comprehensive Income','Consolidated Statements of Operations and Comprehensive Loss','Consolidated Statements of Income and Comprehensive Income','CONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','Statements of Earnings','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE (LOSS)','STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME','Consolidated Statements Of Operations And Comprehensive (Loss) Income','CONSOLIDATED STATEMENTS OF INCOME/(LOSS) AND COMPREHENSIVE INCOME/(LOSS)','CONSOLIDATED STATEMENTS OF OPERATIONS & COMPREHENSIVE INCOME','Consolidated Statements of Net Loss and Comprehensive Loss','Statements Of Condensed Consolidated Operations','Consolidated Statements of Operation','CONDENSED CONSOLIDATED STATEMENTS OF INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS AND COMPREHENSIVE INCOME (LOSS)','CONSOLIDATED STATEMENTS OF NET (LOSS) INCOME','UTAH MEDICAL PRODUCTS, INC. CONSOLIDATED STATEMENT OF INCOME','CONSOLIDATED STATEMENTS OF OPERATIONS (in thousands, except per share data)','Condensed Consolidated Statements of Income','STATEMENT OF CONSOLIDATED INCOME','Statements Of Operations','CONSOLIDATED STATEMENT OF OPERATIONS','CONSOLIDATED STATEMENTS OF NET INCOME','Statements of Consolidated Operations','Consolidated Statements of Operations Consolidated Statements of Operations','CONSOLIDATED STATEMENTS OF EARNINGS AND RETAINED EARNINGS','CONSOLIDATED RESULTS OF OPERATIONS','Consolidated Statements of Net Earnings','CONSOLIDATED STATEMENT OF INCOME (LOSS)','CONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS','Consolidated Statements Of Net Income','Consolidated Statements Of Income And Earnings Reinvested In The Business','Statements of Consolidated Income','Statements of Income','Consolidated Statement of Income Statement','CONSOLIDATED STATEMENTS OF INCOME AND RETAINED EARNINGS','Consolidated Income Statement','CONSOLIDATED STATEMENTS OF OPERATIONS (Audited)','Consolidated Statements of Operations (Unaudited)','Statement of Income','STATEMENTS OF CONSOLIDATED INCOME','Consolidated Statement of Income Consolidated Statement of Income','STATEMENT OF EARNINGS (LOSS)','CONSOLIDATED INCOME STATEMENT','Consolidated Statement of Income','Consolidated Statement Of Income','STATEMENTS OF CONSOLIDATED OPERATIONS','STATEMENTS OF OPERATIONS','STATEMENTS OF EARNINGS','CONSOLIDATED STATEMENT OF INCOME','INCOME STATEMENTS','Consolidated statement of income','Consolidated Statement of Operations','CONSOLIDATED STATEMENTS OF (LOSS) INCOME','Consolidated Statement Of Operations','CONSOLIDATED STATEMENT OF EARNINGS','Consolidated Statement Of Income (Loss)','Consolidated Statements of Income (Loss)','Consolidated Statements of Earnings (Loss)','CONSOLIDATED STATEMENTS OF EARNINGS','Condensed Statements of Income','Statements Of Consolidated Earnings','Consolidated Statements Of Income','Consolidated Statements Of Earnings','CONSOLIDATED INCOME STATEMENTS','Consolidated Results of Operations','STATEMENT OF CONSOLIDATED OPERATIONS','Consolidated Statements of Earnings','Statement of Consolidated Income','Statement of Consolidated Operations','Consolidated Statements Of Operations','CONSOLIDATED STATEMENTS OF INCOME (LOSS)','Consolidated Statements of Income','Consolidated Income Statements','Condensed Consolidated Statements of Operations','Statements of Operations','CONSOLIDATED STATEMENTS OF OPERATIONS','Consolidated Statements of Operations','CONSOLIDATED STATEMENTS OF INCOME','Consolidated Statement of Earnings','Condensed Consolidated Statements of Income (Unaudited)']:
                item1 = (r"{}").format(short)
            if short in ['CONDENSED CONSOLIDATED STATEMENTS OF COMPREHENSIVE LOSS','Statements of comprehensive income (loss)','Consolidated Statements of Comprehensive Income/(Loss) (Details)','Statement of Net and Comprehensive Loss','Consolidated Statements of Comprehensive (Loss)/Income','COMBINED AND CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME','CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME (Details)','Consolidated Statements Of Comprehensive Loss','Consolidated Statement of Comprehensive Income/(Loss)','Consolidated Statements of Comprehensive Income /(Loss)','Consolidated statement of comprehensive loss','COMBINED STATEMENTS OF COMPREHENSIVE INCOME','Consolidated Statement of Comprehensive Loss','Statements of Comprehensive Loss','CONSOLIDATED STATEMENTS OF COMPREHENSIVE (LOSS)/INCOME','Consolidated Statements of Comprehensive Income/ (Loss)','CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME/ (LOSS)','CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME/(LOSS)','Statement of Comprehensive Income/(Loss)','Consolidated statement of comprehensive income','CONSOLIDATED STATEMENT OF COMPREHENSIVE INCOME (LOSS)','CONSOLIDATED COMPREHENSIVE INCOME STATEMENT','Consolidated Statements of Comprehensive Income, by Nature','Consolidated statements of comprehensive income','Statements of Consolidated Comprehensive Income','Statements of Consolidated Comprehensive Income (Loss)','Consolidated Of  Statements of Comprehensive Income','CONSOLIDATED STATEMENT OF COMPREHENSIVE LOSS','CONSOLIDATED STATEMENTS OF COMPREHENSIVE (LOSS) INCOME','Consolidated statements of comprehensive loss','Condensed Consolidated Statements of Comprehensive Income (Unaudited)','CONSOLIDATED STATEMENT OF COMPREHENSIVE INCOME','Condensed Statements of Comprehensive Loss','Statements of Comprehensive Income (Loss)','STATEMENTS OF COMPREHENSIVE LOSS','Consolidated Statements of Comprehensive Earnings','UNAUDITED CONDENSED CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME (LOSS)','Condensed Consolidated Statements of Comprehensive Income','Consolidated Statements of Comprehensive Income (Unaudited)','Consolidated Statements of Comprehensive Loss','Consolidated Statements of Total Comprehensive Income','Consolidated Statement of Comprehensive Income (Loss)','Consolidated Statements of Comprehensive Income (Loss) (Unaudited)','Consolidated Statement Of Comprehensive Income (Loss)','CONSOLIDATED STATEMENTS OF COMPREHENSIVE LOSS','Statements Of Comprehensive Income (Loss)','Consolidated Statement of Comprehensive Income','Consolidated Statements Of Comprehensive Income (Loss)','Consolidated Comprehensive Income Statements','CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME (LOSS)','Consolidated Statements Of Comprehensive Income','Consolidated Statements of Comprehensive (Loss) Income','CONSOLIDATED STATEMENTS OF COMPREHENSIVE INCOME','Consolidated Statements of Comprehensive Income (Loss)','Statements of Comprehensive Income','Consolidated Statements of Comprehensive Income','Consolidated Statements of Comprehensive (Loss)']:
                item2 = (r"{}").format(short) #all statement that have income and comprehensive. Add to item1. First one is an example.
            if short in ['Combined Statements of Financial Position','CONDENSED STATEMENTS OF FINANCIAL POSITION','Amended and Restated Consolidated Statements of Financial Position','Consolidated Statements of Balance Sheets','Consolidated Statements of Financial Position Statement','Statements of financial position','COMBINED AND CONSOLIDATED BALANCE SHEETS','Consolidated Balance Sheets (Successor Basis)','STATEMENTS OF CONSOLIDATED BALANCE SHEETS','COMBINED STATEMENTS OF FINANCIAL POSITION','Statements of Consolidated Financial Position','Consolidated Statetments of Financial Position','Consolidated Statement Of Financial Position','Group Balance Sheet','Statements of Financial Position','Group Balance Sheets','Consolidated statements of financial position','CONSOLIDATED STATEMENT OF FINANCIAL POSITION','Group statement of financial position','GROUP BALANCE SHEET','Financial Statements - Balance sheets','Consolidated statement of financial position','Consolidated statement of financial position of Aegon N.V.','Statement of financial position','Group balance sheet','Combined Balance Sheets','CONDENSED BALANCE SHEET','Combined and Consolidated Balance Sheets','Consolidated and Combined Consolidated Balance Sheets','Condensed Balance Sheet','Condensed Consolidated and Combined Balance Sheets','CONSOLIDATED AND COMBINED BALANCE SHEETS','Consolidated and Combined Balance Sheets','COMBINED CONSOLIDATED BALANCE SHEETS','Combined Statements of Financial Condition','Combined Statements of Assets and Liabilities','CONSOLIDATED STATEMENT OF FINANCIAL CONDITION','CONSOLIDATED BALANCE SHEETS (HD Supply Holdings, Inc.)','Condensed Statements of Financial Condition','STATEMENTS OF ASSETS AND LIABILITIES','Balance sheets','Condensed Consolidated Balance Sheet','FIRST FINANCIAL NORTHWEST, INC. AND SUBSIDIARIES CONSOLIDATED BALANCE SHEETS','Consolidated balance sheets','Statement of Financial Condition','CONSOLIDATED BALANCE SHEETS Statement','Statements of Assets and Liabilities','Balance Sheet','Statement - Consolidated Condensed Balance Sheets (Unaudited)','CONSOLIDATED CONDENSED BALANCE SHEETS','Consolidated Balance Sheets, as of December 31','CONDENSED CONSOLIDATED BALANCE SHEET','Consolidated Condensed Statements of Financial Condition','Unaudited Condensed Consolidated Balance Sheets','STATEMENTS OF FINANCIAL POSITION','BorgWarner Inc. and Consolidated Subsidiaries Consolidated Balance Sheets','Consolidated Balance Sheets Consolidated Balance Sheets','Condensed Consolidated Balance Sheets (Unaudited)','Consolidated Balance Sheets Statement','Consolidated Balance Sheets (Unaudited)','JEWETT-CAMERON TRADING COMPANY LTD. AND SUBSIDIARIES CONSOLIDATED BALANCE SHEETS','BALANCE SHEET','CONDENSED BALANCE SHEETS','Statements of Financial Condition','Carnival Corporation & PLC Consolidated Balance Sheets','Consolidated Balance Sheets (Current Period Unaudited)','Statements Of Condensed Consolidated Financial Position','Consolidated Statement of Financial Condition','Consolidated Statements Of Condition','UTAH MEDICAL PRODUCTS, INC. CONSOLIDATED BALANCE SHEET','Audited Consolidated Balance Sheets','CONSOLIDATED BALANCE SHEETS (in thousands, except share data)','Consolidated Statements Of Financial Condition','Consolidated Statement of Condition','CONDENSED CONSOLIDATED BALANCE SHEETS','CONSOLIDATED STATEMENTS OF FINANICAL POSITION','Consolidated Balance Sheet Statement','Condensed Consolidated Balance Sheet & Mini Balance Sheet','Statement of Financial Position','Consolidated Statements of Condition','Consolidated Balance Sheet & Mini Balance Sheet','STATEMENT OF FINANCIAL POSITION','CONSOLIDATED BALANCE SHEETS (Audited)','CONSOLIDATED STATEMENTS OF CONDITION','CONSOLIDATED STATEMENTS OF FINANCIAL CONDITION','BALANCE SHEETS','Consolidated balance sheet','Consolidated Statement of Financial Position','Consolidated Statements of Financial Position','Consolidated Statements of Financial Condition','Consolidated Financial Position','CONSOLIDATED STATEMENTS OF FINANCIAL POSITION','Condensed Balance Sheets','Consolidated Statements Of Financial Position','CONSOLIDATED BALANCE SHEET','Condensed Consolidated Balance Sheets','Balance Sheets','CONSOLIDATED BALANCE SHEETS','Consolidated Balance Sheets','Consolidated Balance Sheet']:
                item3 = (r"{}").format(short)
            if short in ['Consolidated Statements of Cash Flows (Details)','Consolidated Statements of Cashflows','Amended and Restated Consolidated Statements of Cash Flows','Condensed consolidated statement of cash flows','Statements of cash flows','Consolidated Statements of Cash Flows (Successor Basis)','CONSOLIDATED STATEMENTS OF CASHFLOWS','COMBINED STATEMENTS OF CASH FLOWS','Consolidated Statement of Cash Flows Consolidated Statement of Cash Flows','Cash Flows','Statements of Cash Flows (Twelve Month and Month Ended 2017 Unaudited)','Consolidated Statemenets of Cash Flows','Statements of Cash Flow','Consolidated Statements Of Cash Flows','Group Cash Flow Statement','CONSOLIDATED CASH FLOW STATEMENT','Statements of Cash Flows-Indirect Method','Consolidated cash flow statements','CONSOLIDATED STATEMENT OF CASH FLOWS DIRECT - METHOD','Group Cash Flow Statements','CONSOLIDATED CASH FLOWS STATEMENT','Consolidated Statements of Cash Flows, Direct','Consolidated statements of cash flow','CONSOLIDATED STATEMENT OF CASH FLOW','Consolidated statement of cash flow','Group Statements of cash flows','GROUP CASH FLOW STATEMENT','Financial Statements - Statements of cash flows','Consolidated cash flow statement of Aegon N.V.','Cash flow statements','Statement of Cash Flows in the Consolidated Group','Statement of cash flows','Group cash flow statement','Consolidated cash flow statement','CONDENSED STATEMENT OF CASH FLOWS','Combined and Consolidated Statements of Cash Flows','Consolidated and Combined Consolidated Statements of Cash Flows','Condensed Statement of Cash Flows','CONSOLIDATED AND COMBINED STATEMENT OF CASH FLOW','Consolidated and Combined Statement of Cash Flows','COMBINED AND CONSOLIDATED STATEMENTS OF CASH FLOWS','Consolidated & Combined Statements of Cash Flows','CONSOLIDATED COMBINED STATEMENTS OF CASH FLOWS','Condensed Consolidated and Combined Statements of Cash Flows','Condensed Interim Statement of Cash Flows','Consolidated and Combined Statements of Cash Flows (Unaudited)','CONSOLIDATED AND COMBINED STATEMENT OF CASH FLOWS','Consolidated Statements of Cash flows','CONSOLIDATED STATEMENTS OFCASH FLOWS','STATEMENTS OF CASH FLOW','Consolidated Statements of Cash Flows (unaudited)','Combined Statements of Cash Flows','Consolidated and Combined Statements of Cash Flows','Condensed Consolidated Statements of Changes in Cash Flows','Condensed Consolidated Statements of Cash Flow (Unaudited)','CONSOLIDATED STATEMENTS OF CHANGES OF CASH FLOWS','CONSOLIDATED STATEMENTS OF CASH FLOWS (HD Supply Holdings, Inc.)','CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS (Unaudited)','CONSOLIDATED STATEMENTS OF CASH FLOWS, (Unaudited)','Statements of Cash Flows Statement','CONSOLIDATED CASH FLOWS STATEMENTS','Statements of cash flows','FIRST FINANCIAL NORTHWEST, INC. AND SUBSIDIARIES CONSOLIDATED STATEMENTS OF CASH FLOWS','Statements of Cash Flows (Unaudited)','Consolidated statements of cash flows','CONSOLIDATED STATEMENTS OF CASH FLOWS Statement','Consolidated statements of Cash Flows','Statement Of Cash Flows','CONSOLIDATED CONDENSED STATEMENTS OF CASH FLOWS','Consolidated Statements of Cash Flow Statement','Consolidated Statments of Cash Flows','Audited Consolidated Statements of Cash Flows','Consolidated Statements Of Changes Of Cash Flows','CONDENSED CONSOLIDATED STATEMENT OF CASH FLOWS','UNAUDITED CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS','Consolidated Cash Flows Statements','BorgWarner Inc. and Consolidated Subsidiaries Consolidated Statements of Cash Flows','CONSOLIDATED STATEMENTS OF CASH FLOWS (in thousands)','UNAUDITED CONSOLIDATED STATEMENTS OF CASH FLOWS','Consolidated Statements Of Cash Flow','JEWETT-CAMERON TRADING COMPANY LTD. AND SUBSIDIARIES CONSOLIDATED STATEMENTS OF CASH FLOWS','CONDENSED STATEMENTS OF CASH FLOWS','Consolidated Statement of Cash Flows Statement','Consolidated Statements of Cash Flows Statement','CONSOLIDATED STATEMENTS OF CASH FLOWS (Unaudited)','Carnival Corporation & PLC Consolidated Statements of Cash Flows','Consolidated Statement of Cash Flows (Statement)','CONSOLIDATED AND COMBINED STATEMENTS OF CASH FLOWS','Statements Of Condensed Consolidated Cash Flows','CONSOLIDATED STATEMENT OF CASH FLOWS (Unaudited)','CONSOLIDATED STATEMENTS OF CASH FLOWS (unaudited)','UTAH MEDICAL PRODUCTS, INC. CONSOLIDATED CONDENSED STATEMENT OF CASH FLOW','Consolidated Cash Flow Statements','CONSOLIDATED STATEMENT OF CASH FLOWS (in thousands)','CONSOLIDATED CASH FLOW STATEMENTS','CONSOLIDATED CASH FLOWS','CONDENSED CONSOLIDATED STATEMENTS OF CASH FLOWS','Consolidated Statements Of Capitalization','Statements of Consolidated Cash Flows','Consolidated Cash Flow Statement','Condensed Consolidated Statement of Cash Flows','CONSOLIDATED STATEMENTS OF CASHFLOWS (Audited)','Consolidated Statements of Cash Flows (Unaudited)','Statement of Cash Flows','CONSOLIDATED STATEMENTS of CASH FLOWS','Condensed Consolidated Statements of Cash Flows (Unaudited)','STATEMENT OF CASH FLOWS','CONSOLIDATED STATEMENTS OF CASH FLOWS (Audited)','Consolidated Statements of Cash Flow','CONSOLIDATED STATEMENTS OF CASH FLOW','STATEMENTS OF CONSOLIDATED CASH FLOWS','Statements Of Cash Flows','STATEMENTS OF CASH FLOWS','CASH FLOWS STATEMENTS','Consolidated statement of cash flows','Consolidated Statement Of Cash Flows','Consolidated Statement of Cash Flow','CONSOLIDATED STATEMENT OF CASH FLOWS','Condensed Statements of Cash Flows','Statements Of Consolidated Cash Flows','STATEMENT OF CONSOLIDATED CASH FLOWS','Statement of Consolidated Cash Flows','Consolidated Statements Of Cash Flows','Condensed Consolidated Statements of Cash Flows','Statements of Cash Flows','CONSOLIDATED STATEMENTS OF CASH FLOWS','Consolidated Statements of Cash Flows','Consolidated Statement of Cash Flows']:
                item4 = (r"{}").format(short)
        if item4 == 'Statements of cash flows' and 'Consolidated Statements Of Cash Flows':
            item4 = 'Consolidated Statements Of Cash Flows'
        if item3 == 'Statements of financial position' and item1 == 'Statements of income': # This conditional pulls the CF statement for LINX.
            item4 = 'Statements of cash flows'

        report_list = [item3, item4, item1, item2]
        if item1 and item2 in report_list:
            del report_list[3]
    #     elif item2 in report_list:
    #         report_list = [item3, item4, item2]
        else: 
            item2 in report_list
            report_list = [item3, item4, item2]
        if report_dict['name_short'] in report_list:

    #         print('-'*100)
    #         print(report_dict['name_short'])
    #         print(report_dict['url'])

            statement_url.append(report_dict['url'])
            state.append(report_dict['name_short'])


    state_ = [x.replace(item3,'Balance Sheet').replace(item4,'Cash Flow').replace(item1 or item2,'Income Statement') for x in state]
    list1 = statement_url
    list2 = state_

    zipped_lists = zip(list2, list1)

    sorted_zipped_lists = sorted(zipped_lists)
    sorted_list1 = [element for _, element in sorted_zipped_lists]

    statements_data = []

    for statement in sorted_list1:

        statement_data = {}
        statement_data['headers'] = []
        statement_data['sections'] = []
        statement_data['data'] = []

        content = requests.get(statement).content
        report_soup = BeautifulSoup(content, 'html')

        for index, row in enumerate(report_soup.table.find_all('tr')):

            columns = row.find_all('td')

            if(len(row.find_all('th')) == 0 and len(row.find_all('strong')) == 0):

                regular_row = [element.text.strip() for element in columns]
                statement_data['data'].append(regular_row)

            elif(len(row.find_all('th')) == 0 and len(row.find_all('strong')) != 0):

                section_row = columns[0].text.strip()
                statement_data['sections'].append(section_row)

            elif(len(row.find_all('th')) != 0):

                header_row = [element.text.strip() for element in row.find_all('th')]
                statement_data['headers'].append(header_row)

            else:
                 print('There is an error!')

        statements_data.append(statement_data)
else:
    print('This company is not supported on our platform or is not in our publicly traded list.')
    

# ORGANIZING AND CLEANING BALANCE SHEET - AUTOMATED 
financial_statement = []
def Number_of_lists(anything): 
    return len(anything)
count_2_bs = ''
count_1_bs = ''
count = Number_of_lists(statements_data[0]['headers'])
if count == 2:
    x = statements_data[0]['headers']
    count_2_bs = sum(x, [])
    if count_2_bs[1] == '12 Months Ended':
        del[count_2_bs[1]]
#     print('count 2: ', count_2_bs)
else:
    if count == 1:
        count_1_bs = statements_data[0]['headers'][0]
#     print('count 1: ', count_1_bs)
balance_sheet_headers = count_2_bs or count_1_bs
# print('Before:',balance_sheet_headers)
drop_1_headers_bs = []
string_bs = ['[1]','[2]','[3]','[4]','[1],[2]']
for i in range(0, len(balance_sheet_headers)) : 
    if balance_sheet_headers[i] in string_bs : 
        drop_1_headers_bs.append(i) 
new_headers_bs = [j for i, j in enumerate(balance_sheet_headers) if i not in drop_1_headers_bs]
# print('After:',new_headers_bs)
balance_sheet_headers = [n.replace('\n','| ') for n in new_headers_bs]

balance_sheet_data = statements_data[0]['data']
example_bs = balance_sheet_data[0:]
balance_sheet_data_ = examples_bs = [[x.replace('[1]','').replace('[2]', '').replace('[3]', '').replace('[4]', '') for x in i] for i in example_bs]
# balance_sheet_data_ = examples_bs
balance_sheet_df = pd.DataFrame(balance_sheet_data_)

balance_sheet_df.index = balance_sheet_df[0] # Make index like column zero 
# balance_sheet_df.index.name = balance_sheet_headers[0].replace(' ', '')
balance_sheet_df.index.name = balance_sheet_headers[0].split('-')[1].replace(' ', '').replace('(', '').replace(')', '').replace(',', '').replace('$', '')
balance_sheet_df = balance_sheet_df.drop(0, axis = 1)

balance_sheet_df = balance_sheet_df.replace('[/$,R,¥,£,₩,₽,CAD,€,₱,₨,SFr,₪)]', '', regex = True)\
                     .replace('[(]','-', regex = True)\
                     .replace('', np.nan, regex = True)\
                     .replace('[/None)]', np.nan, regex = True)

array_balance_sheet = np.array(balance_sheet_headers[1:]).size   # Count the number of elements for list
# print(array_balance_sheet)
sum_NaNs_balance_sheet = (balance_sheet_df.isnull().sum())   # Find sum of all null values for each column
# print(sum_NaNs_balance_sheet)
# print(sum_NaNs_balance_sheet.size)
if sum_NaNs_balance_sheet.size == array_balance_sheet:
    balance_sheet_df = balance_sheet_df.astype(float)
    balance_sheet_df.columns = balance_sheet_headers[1:]
    balance_sheet_df.columns = balance_sheet_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
balance_sheet_df

if sum_NaNs_balance_sheet.size != array_balance_sheet: #and sum_NaNs_balance_sheet.size > array_balance_sheet:       # If the number of elements do not match, subtract the difference.
    how_many_nums_to_delete_balance_sheet = sum_NaNs_balance_sheet.size - (sum_NaNs_balance_sheet.size - array_balance_sheet)
#     print(how_many_nums_to_delete_balance_sheet)
    p_l_balance_sheet = list(sum_NaNs_balance_sheet)                 # Convert sum of all null values into a list and sort lowest value to
    p_l_balance_sheet.sort(reverse=False)                 # to highest value.
    # print(p_l_balance_sheet[0:how_many_nums_to_delete_balance_sheet])  # Then, slice it to the number equal to array_income.

    max_new_list_balance_sheet = max(p_l_balance_sheet[0:how_many_nums_to_delete_balance_sheet]) # Find max of sliced list
    bs_df = balance_sheet_df.loc[:, (balance_sheet_df.isnull().sum(axis=0) <= max_new_list_balance_sheet)] #Delete columns with a certain amount of null values.

    bs_df = bs_df.rename(index={'': 'Empty'})
    bool_value = bs_df.index == 'Empty'
    index_number = np.where(bool_value)[0]
    int_value = int(index_number[0:1])

    bs_df[0:int_value]
    balance_sheet_df = bs_df[0:int_value].astype(float)
    balance_sheet_df.columns = balance_sheet_headers[1:]
    balance_sheet_df.columns = balance_sheet_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
balance_sheet_df
    
    
    
# ORGANIZING AND CLEANING CASH FLOW STATEMENT - AUTOMATED 
cf_headers = statements_data[1]['headers'][1]
# print('Before:',cf_headers)
drop_1_headers_cf = []
string_cf = ['[1]','[2]','[3]','[4]','[1],[2]']
for i in range(0, len(cf_headers)) : 
    if cf_headers[i] in string_cf : 
        drop_1_headers_cf.append(i) 
new_headers_cf = [j for i, j in enumerate(cf_headers) if i not in drop_1_headers_cf]
# print('After:',new_headers_cf)
cf_headers = [n.replace('\n','| ') for n in new_headers_cf]

cf_main_header = statements_data[1]['headers'][0]

cf_data = statements_data[1]['data']
example_cf = cf_data[0:]
cf_data_ = examples_cf = [[x.replace('[1]','').replace('[2]', '').replace('[3]', '').replace('[4]', '') for x in i] for i in example_cf]
# cf_data_ = examples_cf
cf_df = pd.DataFrame(cf_data_)

cf_df.index = cf_df[0] # Make index like column zero 
# cf_df.index.name = cf_main_header[0]
cf_df.index.name = cf_main_header[0].split('-')[1].replace(' ', '').replace('(', '').replace(')', '').replace(',', '').replace('$', '')
cf_df = cf_df.drop(0, axis = 1)

cf_df = cf_df.replace('[/$,R,¥,£,₩,₽,CAD,€,₱,₨,SFr,₪)]', '', regex = True)\
                     .replace('[(]','-', regex = True)\
                     .replace('', np.nan, regex = True)\
                     .replace('[/None)]', np.nan, regex = True)

array_cf = np.array(cf_headers).size   # Count the number of elements for list
# print(array_cf)
sum_NaNs_cf = (cf_df.isnull().sum())   # Find sum of all null values for each column
# print(sum_NaNs_cf.size)
if sum_NaNs_cf.size == array_cf:
    cf_df = cf_df.astype(float)
    cf_df.columns = cf_headers
    cf_df.columns = cf_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
cf_df
if sum_NaNs_cf.size != array_cf: # and sum_NaNs_cf.size > array_cf:       # If the number of elements do not match, subtract the difference.
    how_many_nums_to_delete_cf = sum_NaNs_cf.size - (sum_NaNs_cf.size - array_cf)
# print(how_many_nums_to_delete_cf)
    p_l_cf = list(sum_NaNs_cf)                 # Convert sum of all null values into a list and sort lowest value to
    p_l_cf.sort(reverse=False)                 # to highest value.
    # print(p_l_cf[0:how_many_nums_to_delete_cf])  # Then, slice it to the number equal to array_income.

    max_new_list_cf = max(p_l_cf[0:how_many_nums_to_delete_cf]) # Find max of sliced list
    cf_df_ = cf_df.loc[:, (cf_df.isnull().sum(axis=0) <= max_new_list_cf)] #Delete columns with a certain amount of null values.

    cf_df_ =cf_df_.rename(index={'': 'Empty'})
    bool_value = cf_df_.index == 'Empty'
    index_number = np.where(bool_value)[0]
    int_value = int(index_number[0:1])

    cf_df_[0:int_value]
    cf_df = cf_df_[0:int_value].astype(float)
    cf_df.columns = cf_headers
    cf_df.columns = cf_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
cf_df



# ORGANIZING AND CLEANING INCOME STATEMENT - AUTOMATED 
income_headers = statements_data[2]['headers'][1]
# print('Before:',income_headers)
drop_1_headers_i = []
string_i = ['[1]','[2]','[3]','[4]','[1],[2]']
for i in range(0, len(income_headers)) : 
    if income_headers[i] in string_i : 
        drop_1_headers_i.append(i) 
new_headers_i = [j for i, j in enumerate(income_headers) if i not in drop_1_headers_i]
# print('After:',new_headers_i)
income_headers = [n.replace('\n','| ') for n in new_headers_i]

income_main_header = statements_data[2]['headers'][0]

income_data = statements_data[2]['data']
example_i = income_data[0:]
income_data_ = examples_i = [[x.replace('[1]','').replace('[2]', '').replace('[3]', '').replace('[4]', '') for x in i] for i in example_i]
income_data_ = examples_i
income_df = pd.DataFrame(income_data_)

income_df.index

income_df.index = income_df[0] # Make index like column zero 
income_df.index.name = income_main_header[0] # Pulls name of statement, currency, and number notation.
income_df.index.name = income_main_header[0].split('-')[1].replace(' ', '').replace('(', '').replace(')', '').replace(',', '').replace('$', '')
income_df = income_df.drop(0, axis = 1)

income_df = income_df.replace('[/$,R,¥,£,₩,₽,CAD,€,₱,₨,SFr,₪)]', '', regex = True)\
                     .replace('[(]','-', regex = True)\
                     .replace('', 'NaN', regex = True)\
                     .replace('[/None)]', np.nan, regex = True)

array_income = np.array(income_headers).size   # Count the number of elements for list
# print(array_income)
sum_NaNs_income = (income_df.isnull().sum())   # Find sum of all null values for each column
# print(sum_NaNs_income)
# print(sum_NaNs_income.size)
if sum_NaNs_income.size == array_income:
    income_df = income_df.astype(float)
    income_df.columns = income_headers
    income_df.columns = cf_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
income_df
if sum_NaNs_income.size != array_income: #and sum_NaNs_income.size > array_income:       # If the number of elements do not match, subtract the difference.     
    how_many_nums_to_delete_income = sum_NaNs_income.size - (sum_NaNs_income.size - array_income)
# print(type(how_many_nums_to_delete_income))
    p_l_income = list(sum_NaNs_income)             # Convert sum of all null values into a list and sort lowest value to
    p_l_income.sort(reverse=False)                 # to highest value.
    p_l_income[0:how_many_nums_to_delete_income]   # Then, slice it to the number equal to array_income.

    max_new_list_income = max(p_l_income[0:how_many_nums_to_delete_income]) # Find max of sliced list
    income_df_ = income_df.loc[:, (income_df.isnull().sum(axis=0) <= max_new_list_income)] #Delete columns with a certain amount of null values.

    income_df_ =income_df_.rename(index={'': 'Empty'}) #File blank index ('') with the word ('Empty')
    bool_values = income_df_.index == 'Empty'    # Use boolean: When index equals ('Empty'): print 'True'
    index_numbers = np.where(bool_values)[0]    
    int_values = int(index_numbers[0:1])              # Find index value in an integer term.

    income_df_[0:int_values]            # Keep all rows up to, but not including the index value equaled to ('Empty').
    income_df = income_df_[0:int_values].astype(float) # Convert to a float
    income_df.columns = income_headers               # Substitute the headers list with the dates.
    income_df.columns = cf_df.columns.str.replace(',', '_').str.replace('.', '_').str.replace(' ', '') #######
income_df



# ALL THREE FINANCIAL STATEMENTS WILL BE STORED IN MYSQL. PYTHON IS CONNECTED TO MYSQL BELOW 
# SCHEMAS, TABLES, COLUMNS, TYPES, NULLABLES, AND ANY OTHER INFORMATION ADDED AND AUTOMATED WITH THE CODE BELOW.

import mysql.connector

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  password="YOUR_OWN_PASSWORD"
)

mycursor = mydb.cursor()

mycursor.execute("CREATE DATABASE {}".format(ticker))

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  password="YOUR_OWN_PASSWORD_HERE",
  database= str(ticker)
)

mycursor = mydb.cursor()
# cik_id = cik_num 
index_name_bs = balance_sheet_df.index.name
column_zero_bs = balance_sheet_df.columns[0]
column_one_bs = balance_sheet_df.columns[1]
year_posted = int(column_zero_bs[-2:])
table_bs = ticker + '_BS_' + str(year_posted) 

mycursor.execute('CREATE TABLE {} (ticker INT NOT NULL AUTO_INCREMENT PRIMARY KEY, {} VARCHAR(600), {} INT, {} INT)'.format(table_bs, index_name_bs, column_zero_bs, column_one_bs))

engine = sqlalchemy.create_engine('mysql+pymysql://root:PASSWORD@localhost:3306/{}'.format(ticker))
balance_sheet_df.to_sql(name = table_bs ,con = engine, if_exists = 'append')

table_cf = ticker + '_CF_' + str(year_posted)
index_name_cf = cf_df.index.name
column_zero_cf = cf_df.columns[0]
column_one_cf = cf_df.columns[1]
column_two_cf = cf_df.columns[2]

mycursor.execute('CREATE TABLE {} (ticker INT NOT NULL AUTO_INCREMENT PRIMARY KEY, {} VARCHAR(600), {} INT, {} INT, {} INT)'.format(table_cf, index_name_cf, column_zero_cf, column_one_cf, column_two_cf))

engine = sqlalchemy.create_engine('mysql+pymysql://root:PASSWORD@localhost:3306/{}'.format(ticker))
cf_df.to_sql(name = table_cf ,con = engine, if_exists = 'append')

table_ic = ticker + '_IC_' + str(year_posted)
index_name_ic = income_df.index.name
column_zero_ic = income_df.columns[0]
column_one_ic = income_df.columns[1]
column_two_ic = income_df.columns[2]

mycursor.execute('CREATE TABLE {} (ticker INT NOT NULL AUTO_INCREMENT PRIMARY KEY, {} VARCHAR(600), {} INT, {} INT, {} INT)'.format(table_ic, index_name_ic, column_zero_ic, column_one_ic, column_two_ic))

engine = sqlalchemy.create_engine('mysql+pymysql://root:PASSWORD@localhost:3306/{}'.format(ticker))
income_df.to_sql(name = table_ic ,con = engine, if_exists = 'append')

IndexError: list index out of range