In [597]:
# Importing built-in libraries (no need to install these)
import sys
import re
import os
from time import gmtime, strftime
from datetime import datetime, timedelta
import unicodedata

# Importing libraries you need to install
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import requests
import bs4 as bs
from lxml import html
from tqdm import tqdm
import glob
import shutil
import re
from dateutil.parser import parse
from datetime import datetime, timedelta
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
from bs4 import NavigableString


In [599]:
original_directory = "/Users/andrewwang/MyDocuments/10Q_Scraping"
os.chdir(original_directory)


with open("SP500_Tickers.csv") as f:
    tickers = [row.split()[0] for row in f]

In [600]:
def MapTickerToCik(tickers):
    url = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany'
    cik_re = re.compile(r'.*CIK=(\d{10}).*')

    cik_dict = {}
    for ticker in tqdm(tickers): # Use tqdm lib for progress bar
        results = cik_re.findall(requests.get(url.format(ticker)).text)
        if len(results):
            cik_dict[str(ticker).lower()] = str(results[0])
    
    return cik_dict

In [601]:
cik_dict = MapTickerToCik(tickers)

100%|██████████| 505/505 [01:21<00:00,  6.17it/s]


In [602]:
cik_dict

{'a': '0001090872',
 'aal': '0000006201',
 'aap': '0001158449',
 'aapl': '0000320193',
 'abbv': '0001551152',
 'abc': '0001140859',
 'abmd': '0000815094',
 'abt': '0000001800',
 'acn': '0001467373',
 'adbe': '0000796343',
 'adi': '0000006281',
 'adm': '0000007084',
 'adp': '0000008670',
 'ads': '0001101215',
 'adsk': '0000769397',
 'aee': '0001002910',
 'aep': '0000004904',
 'aes': '0000874761',
 'afl': '0000004977',
 'aig': '0000005272',
 'aiv': '0000922864',
 'aiz': '0001267238',
 'ajg': '0000354190',
 'akam': '0001086222',
 'alb': '0000915913',
 'algn': '0001097149',
 'alk': '0000766421',
 'all': '0000899051',
 'alle': '0001579241',
 'alxn': '0000899866',
 'amat': '0000006951',
 'amcr': '0001748790',
 'amd': '0000002488',
 'ame': '0001037868',
 'amgn': '0000318154',
 'amp': '0000820027',
 'amt': '0001053507',
 'amzn': '0001018724',
 'anet': '0001596532',
 'anss': '0001013462',
 'antm': '0001156039',
 'aon': '0000315293',
 'aos': '0000091142',
 'apa': '0000006769',
 'apd': '000000296

In [603]:
# Clean up the ticker-CIK mapping as a DataFrame
ticker_cik_df = pd.DataFrame.from_dict(data=cik_dict, orient='index')
ticker_cik_df.reset_index(inplace=True)
ticker_cik_df.columns = ['ticker', 'cik']
ticker_cik_df['cik'] = [str(cik) for cik in ticker_cik_df['cik']]
ticker_cik_df = ticker_cik_df.set_index('ticker')
ticker_cik_df["10-K Filing Date"] = ""
ticker_cik_df["10-K Period"] = ""
ticker_cik_df["10-Q Filing Date"] = ""
ticker_cik_df["10-Q Period"] = ""
ticker_cik_df

Unnamed: 0_level_0,cik,10-K Filing Date,10-K Period,10-Q Filing Date,10-Q Period
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mmm,0000066740,,,,
abt,0000001800,,,,
abbv,0001551152,,,,
abmd,0000815094,,,,
acn,0001467373,,,,
atvi,0000718877,,,,
adbe,0000796343,,,,
amd,0000002488,,,,
aap,0001158449,,,,
aes,0000874761,,,,


In [604]:
def WriteLogFile(log_file_name, text):
    
    '''
    Helper function.
    Writes a log file with all notes and
    error messages from a scraping "session".
    
    Parameters
    ----------
    log_file_name : str
        Name of the log file (should be a .txt file).
    text : str
        Text to write to the log file.
        
    Returns
    -------
    None.
    
    '''
    
    with open(log_file_name, "a") as log_file:
        log_file.write(text)

    return

In [605]:
def ScrapeDocument(ticker, browse_url_base, filing_url_base, doc_url_base, cik, log_file_name, is10K):
    
    '''
    Scrapes all 10-Ks and 10-K405s for a particular 
    CIK from EDGAR.
    
    Parameters
    ----------
    browse_url_base : str
        Base URL for browsing EDGAR.
    filing_url_base : str
        Base URL for filings listings on EDGAR.
    doc_url_base : str
        Base URL for one filing's document tables
        page on EDGAR.
    cik : str
        Central Index Key.
    log_file_name : str
        Name of the log file (should be a .txt file).
        
    Returns
    -------
    None.
    
    '''
    
    # Check if we've already scraped this CIK
    try:
        os.mkdir(cik)
    except OSError:
        print("Already scraped CIK", cik)
        return
    
    # If we haven't, go into the directory for that CIK
    os.chdir(cik)
    
    print('Scraping CIK', cik)
    
    # Request list of 10-K filings
    res = requests.get(browse_url_base.format(cik))
    
    # If the request failed, log the failure and exit
    if res.status_code != 200:
        os.chdir('..')
        os.rmdir(cik) # remove empty dir
        text = "Request failed with error code " + str(res.status_code) + \
               "\nFailed URL: " + (browse_url_base.format(cik)) + '\n'
        WriteLogFile(log_file_name, text)
        return

    # If the request doesn't fail, continue...
    
    # Parse the response HTML using BeautifulSoup
    soup = bs.BeautifulSoup(res.text, "lxml")

    # Extract all tables from the response
    html_tables = soup.find_all('table')
    
    # Check that the table we're looking for exists
    # If it doesn't, exit
    if len(html_tables)<3:
        os.chdir('..')
        return
    
    # Parse the Filings table
    filings_table = pd.read_html(str(html_tables[2]), header=0)[0]
    filings_table['Filings'] = [str(x) for x in filings_table['Filings']]

    # Get only 10-K and 10-K405 document filings
    if is10K:
        filings_table = filings_table[(filings_table['Filings'] == '10-K') | (filings_table['Filings'] == '10-K405')]
    else:
        filings_table = filings_table[(filings_table['Filings'] == '10-Q')]
        
    # If filings table doesn't have any
    # 10-Ks or 10-K405s, exit
    if len(filings_table)==0:
        os.chdir('..')
        return
    
    # Get accession number for each 10-K and 10-K405 filing
    filings_table['Acc_No'] = [x.replace('\xa0',' ')
                               .split('Acc-no: ')[1]
                               .split(' ')[0] for x in filings_table['Description']]

    # Iterate through each filing and 
    # scrape the corresponding document...
    for index, row in filings_table.iterrows():
        
        # Get the accession number for the filing
        acc_no = str(row['Acc_No'])
        # print(filing_url_base.format(cik, acc_no))
        
        # Navigate to the page for the filing
        docs_page = requests.get(filing_url_base.format(cik, acc_no))
        
        # If request fails, log the failure
        # and skip to the next filing
        if docs_page.status_code != 200:
            os.chdir('..')
            text = "Request failed with error code " + str(docs_page.status_code) + \
                   "\nFailed URL: " + (filing_url_base.format(cik, acc_no)) + '\n'
            WriteLogFile(log_file_name, text)
            os.chdir(cik)
            continue

        # If request succeeds, keep going...
        
        # Parse the table of documents for the filing
        docs_page_soup = bs.BeautifulSoup(docs_page.text, 'lxml')
        
        filing_date_div = docs_page_soup.find(text=re.compile("Filing (D|d)ate")).parent
        filing_date = filing_date_div.findNext('div').get_text()
        period_of_report_div = docs_page_soup.find(text=re.compile("Period (O|o)f (R|r)eport")).parent
        period_of_report_date = period_of_report_div.findNext('div').get_text()
        
        if is10K:
            ticker_cik_df['10-K Filing Date'] = filing_date
            ticker_cik_df['10-K Period'] = period_of_report_date
        else:
            ticker_cik_df['10-Q Filing Date'] = filing_date
            ticker_cik_df['10-Q Period'] = period_of_report_date
        
        docs_html_tables = docs_page_soup.find_all('table')
        if len(docs_html_tables)==0:
            continue
        docs_table = pd.read_html(str(docs_html_tables[0]), header=0)[0]
        docs_table['Type'] = [str(x) for x in docs_table['Type']]
        
        # Get the 10-K and 10-K405 entries for the filing
        if is10K:
            docs_table = docs_table[(docs_table['Type'] == '10-K') | (docs_table['Type'] == '10-K405')]
        else:
            docs_table = docs_table[(docs_table['Type'] == '10-Q')]
        # If there aren't any 10-K or 10-K405 entries,
        # skip to the next filing
        if len(docs_table)==0:
            continue
        # If there are 10-K or 10-K405 entries,
        # grab the first document
        elif len(docs_table)>0:
            docs_table = docs_table.iloc[0]
        
        docname = docs_table['Document']
        
        # If that first entry is unavailable,
        # log the failure and exit
        if str(docname) == 'nan':
            os.chdir('..')
            text = 'File with CIK: {} and Acc_No: {} is unavailable'.format(cik, acc_no) + '\n'
            WriteLogFile(log_file_name, text)
            os.chdir(cik)
            continue       
        
        # If it is available, continue...
        docname = docname.split()[0]
        # Request the file
        file = requests.get(doc_url_base.format(cik, acc_no.replace('-', ''), docname))
        
        # If the request fails, log the failure and exit
        if file.status_code != 200:
            raise Exception("Fuck")
            os.chdir('..')
            text = "Request failed with error code " + str(file.status_code) + \
                   "\nFailed URL: " + (doc_url_base.format(cik, acc_no.replace('-', ''), docname)) + '\n'
            WriteLogFile(log_file_name, text)
            os.chdir(cik)
            continue
        
        # If it succeeds, keep going...
        
        # Save the file in appropriate format
        if '.txt' in docname:
            # Save text as TXT
            date = str(row['Filing Date'])
            filename = cik + '_' + date + '.txt'
            html_file = open(filename, 'a')
            html_file.write(file.text)
            html_file.close()
        else:
            # Save text as HTML
            date = str(row['Filing Date'])
            filename = cik + '_' + date + '.html'
            html_file = open(filename, 'a')
            html_file.write(file.text)
            html_file.close()
            
        break
        
    # Move back to the main 10-K directory
    os.chdir('..')
        
    return

In [606]:
def delete_contents(foldername):
    for root, dirs, files in os.walk(foldername):
        for f in files:
            os.unlink(os.path.join(root, f))
        for d in dirs:
            shutil.rmtree(os.path.join(root, d))
        
pathname_10k = original_directory + '/10_K_Docs'
pathname_10q = original_directory + '/10_Q_Docs'

delete_contents(pathname_10k)
delete_contents(pathname_10q)

In [607]:
os.chdir(original_directory)
print(os.getcwd())
# Run the function to scrape 10-K
# Define parameters
browse_url_base_10k = 'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={}&type=10-K'
filing_url_base_10k = 'http://www.sec.gov/Archives/edgar/data/{}/{}-index.html'
doc_url_base_10k = 'http://www.sec.gov/Archives/edgar/data/{}/{}/{}'

# Set correct directory
os.chdir(pathname_10k)

# Initialize log file
# (log file name = the time we initiate scraping session)
time = strftime("%Y-%m-%d %Hh%Mm%Ss", gmtime())
log_file_name = 'log '+time+'.txt'
with open(log_file_name, 'a') as log_file:
    log_file.close()

# Iterate over CIKs and scrape 10-Ks
for ticker,row in tqdm(ticker_cik_df.iterrows()):
    ScrapeDocument(ticker=ticker,
                   browse_url_base=browse_url_base_10k, 
                   filing_url_base=filing_url_base_10k, 
                   doc_url_base=doc_url_base_10k, 
                   cik=row['cik'],
                   log_file_name=log_file_name,
                   is10K = True)
os.chdir('..')

0it [00:00, ?it/s]

/Users/andrewwang/MyDocuments/10Q_Scraping
Scraping CIK 0000066740


1it [00:00,  1.02it/s]

Scraping CIK 0000001800


2it [00:01,  1.08it/s]

Scraping CIK 0001551152


3it [00:02,  1.10it/s]

Scraping CIK 0000815094


4it [00:03,  1.10it/s]

Scraping CIK 0001467373


5it [00:04,  1.10it/s]

Scraping CIK 0000718877


6it [00:05,  1.14it/s]

Scraping CIK 0000796343


7it [00:06,  1.17it/s]

Scraping CIK 0000002488


8it [00:06,  1.15it/s]

Scraping CIK 0001158449


9it [00:07,  1.16it/s]

Scraping CIK 0000874761


10it [00:08,  1.10it/s]

Scraping CIK 0000004977


11it [00:09,  1.13it/s]

Scraping CIK 0001090872


12it [00:10,  1.14it/s]

Scraping CIK 0000002969


13it [00:11,  1.12it/s]

Scraping CIK 0001086222


14it [00:12,  1.12it/s]

Scraping CIK 0000766421


15it [00:13,  1.09it/s]

Scraping CIK 0000915913


16it [00:14,  1.08it/s]

Scraping CIK 0001035443


17it [00:15,  1.06it/s]

Scraping CIK 0000899866


18it [00:16,  1.07it/s]

Scraping CIK 0001097149


19it [00:16,  1.10it/s]

Scraping CIK 0001579241


20it [00:17,  1.14it/s]

Scraping CIK 0001101215


21it [00:18,  1.12it/s]

Scraping CIK 0000352541


22it [00:19,  1.09it/s]

Scraping CIK 0000899051


23it [00:20,  1.08it/s]

Scraping CIK 0001652044


24it [00:21,  1.18it/s]

Already scraped CIK 0001652044
Scraping CIK 0000764180


26it [00:22,  1.37it/s]

Scraping CIK 0001018724


27it [00:23,  1.22it/s]

Scraping CIK 0001748790


28it [00:24,  1.13it/s]

Scraping CIK 0001002910


29it [00:25,  1.02s/it]

Scraping CIK 0000006201


30it [00:26,  1.04s/it]

Scraping CIK 0000004904


31it [00:27,  1.06s/it]

Scraping CIK 0000004962


32it [00:29,  1.15s/it]

Scraping CIK 0000005272


33it [00:31,  1.38s/it]

Scraping CIK 0001053507


34it [00:32,  1.27s/it]

Scraping CIK 0001410636


35it [00:33,  1.18s/it]

Scraping CIK 0000820027


36it [00:34,  1.20s/it]

Scraping CIK 0001140859


37it [00:35,  1.15s/it]

Scraping CIK 0001037868


38it [00:36,  1.10s/it]

Scraping CIK 0000318154


39it [00:37,  1.06s/it]

Scraping CIK 0000820313


40it [00:38,  1.03s/it]

Scraping CIK 0000006281


41it [00:39,  1.01it/s]

Scraping CIK 0001013462


42it [00:40,  1.00s/it]

Scraping CIK 0001156039


43it [00:41,  1.03it/s]

Scraping CIK 0000315293


44it [00:42,  1.02it/s]

Scraping CIK 0000091142


45it [00:43,  1.03it/s]

Scraping CIK 0000006769


46it [00:44,  1.02s/it]

Scraping CIK 0000922864


47it [00:45,  1.04s/it]

Scraping CIK 0000320193


48it [00:46,  1.03it/s]

Scraping CIK 0000006951


49it [00:47,  1.00it/s]

Scraping CIK 0001521332


50it [00:48,  1.01it/s]

Scraping CIK 0000007084


51it [00:49,  1.00s/it]

Scraping CIK 0001596532


52it [00:50,  1.04it/s]

Scraping CIK 0000354190


53it [00:51,  1.01it/s]

Scraping CIK 0001267238


54it [00:52,  1.02s/it]

Scraping CIK 0000732717


55it [00:53,  1.10s/it]

Scraping CIK 0000731802


56it [00:54,  1.10s/it]

Scraping CIK 0000769397


57it [00:55,  1.08s/it]

Scraping CIK 0000008670


58it [00:56,  1.04s/it]

Scraping CIK 0000866787


59it [00:57,  1.00it/s]

Scraping CIK 0000915912


60it [00:58,  1.01s/it]

Scraping CIK 0000008818


61it [00:59,  1.05it/s]

Scraping CIK 0001701605


62it [01:00,  1.09it/s]

Scraping CIK 0000009389


63it [01:01,  1.03it/s]

Scraping CIK 0000070858


64it [01:02,  1.14s/it]

Scraping CIK 0001390777


65it [01:03,  1.04s/it]

Scraping CIK 0000010456


66it [01:04,  1.09s/it]

Scraping CIK 0000010795


67it [01:05,  1.07s/it]

Scraping CIK 0000764478


68it [01:06,  1.07s/it]

Scraping CIK 0000875045


69it [01:08,  1.10s/it]

Scraping CIK 0001364742


70it [01:09,  1.10s/it]

Scraping CIK 0000012927


71it [01:10,  1.07s/it]

Scraping CIK 0001075531


72it [01:11,  1.04s/it]

Scraping CIK 0000908255


73it [01:12,  1.00s/it]

Scraping CIK 0001037540


74it [01:13,  1.08s/it]

Scraping CIK 0000885725


75it [01:14,  1.05s/it]

Scraping CIK 0000014272


76it [01:15,  1.03s/it]

Scraping CIK 0001730168


77it [01:16,  1.02s/it]

Scraping CIK 0001383312


78it [01:17,  1.03it/s]

Scraping CIK 0001043277


79it [01:17,  1.05it/s]

Scraping CIK 0000858470


80it [01:18,  1.05it/s]

Scraping CIK 0000813672


81it [01:20,  1.02it/s]

Scraping CIK 0000016732


82it [01:21,  1.00it/s]

Scraping CIK 0000927628


83it [01:22,  1.03s/it]

Scraping CIK 0000721371


84it [01:23,  1.02s/it]

Scraping CIK 0001170010


85it [01:24,  1.01it/s]

Scraping CIK 0000815097


87it [01:25,  1.41it/s]

Scraping CIK 0001783180
Scraping CIK 0000018230


88it [01:26,  1.16it/s]

Scraping CIK 0001374310


89it [01:27,  1.13it/s]

Scraping CIK 0001138118


90it [01:28,  1.02it/s]

Scraping CIK 0001402057


91it [01:29,  1.05it/s]

Scraping CIK 0001306830


92it [01:30,  1.02it/s]

Scraping CIK 0001071739


93it [01:31,  1.04it/s]

Scraping CIK 0001130310


94it [01:32,  1.02s/it]

Scraping CIK 0000018926


95it [01:33,  1.02it/s]

Scraping CIK 0000804753


96it [01:34,  1.06it/s]

Scraping CIK 0001324404


97it [01:35,  1.08it/s]

Scraping CIK 0000316709


98it [01:36,  1.03it/s]

Scraping CIK 0001091667


99it [01:37,  1.02s/it]

Scraping CIK 0000093410


100it [01:38,  1.04s/it]

Scraping CIK 0001058090


101it [01:39,  1.01it/s]

Scraping CIK 0000896159


102it [01:40,  1.13s/it]

Scraping CIK 0000313927


103it [01:41,  1.09s/it]

Scraping CIK 0001739940


104it [01:42,  1.05s/it]

Scraping CIK 0000020286


105it [01:43,  1.07s/it]

Scraping CIK 0000723254


106it [01:44,  1.07s/it]

Scraping CIK 0000858877


107it [01:45,  1.05s/it]

Scraping CIK 0000831001


108it [01:47,  1.17s/it]

Scraping CIK 0000759944


109it [01:48,  1.11s/it]

Scraping CIK 0000877890


110it [01:49,  1.04s/it]

Scraping CIK 0000021076


111it [01:50,  1.01it/s]

Scraping CIK 0001156375


112it [01:50,  1.05it/s]

Scraping CIK 0000811156


113it [01:52,  1.03s/it]

Scraping CIK 0000021344


114it [01:53,  1.05s/it]

Scraping CIK 0001058290


115it [01:54,  1.01s/it]

Scraping CIK 0000021665


116it [01:55,  1.00it/s]

Scraping CIK 0001166691


117it [01:56,  1.02s/it]

Scraping CIK 0000028412


118it [01:57,  1.02s/it]

Scraping CIK 0000023217


119it [01:58,  1.03s/it]

Scraping CIK 0001358071


120it [01:59,  1.04it/s]

Scraping CIK 0001163165


121it [02:00,  1.01s/it]

Scraping CIK 0001047862


122it [02:01,  1.08s/it]

Scraping CIK 0000016918


123it [02:02,  1.06s/it]

Scraping CIK 0000711404


124it [02:03,  1.01s/it]

Scraping CIK 0000900075


125it [02:04,  1.05it/s]

Scraping CIK 0000024741


126it [02:05,  1.06it/s]

Scraping CIK 0001755672


127it [02:05,  1.13it/s]

Scraping CIK 0000909832


128it [02:06,  1.16it/s]

Scraping CIK 0001024305


129it [02:07,  1.15it/s]

Scraping CIK 0001051470


130it [02:08,  1.17it/s]

Scraping CIK 0000277948


131it [02:09,  1.20it/s]

Scraping CIK 0000026172


132it [02:09,  1.17it/s]

Scraping CIK 0000064803


133it [02:10,  1.11it/s]

Scraping CIK 0000882184


134it [02:11,  1.11it/s]

Scraping CIK 0000313616


135it [02:12,  1.13it/s]

Scraping CIK 0000940944


136it [02:13,  1.12it/s]

Scraping CIK 0000927066


137it [02:14,  1.12it/s]

Scraping CIK 0000315189


138it [02:15,  1.12it/s]

Scraping CIK 0000027904


139it [02:16,  1.12it/s]

Scraping CIK 0000818479


140it [02:17,  1.15it/s]

Scraping CIK 0001090012


141it [02:18,  1.07it/s]

Scraping CIK 0001093557


142it [02:19,  1.09it/s]

Scraping CIK 0001539838


143it [02:19,  1.11it/s]

Scraping CIK 0001297996


144it [02:20,  1.09it/s]

Scraping CIK 0001393612


145it [02:21,  1.08it/s]

Scraping CIK 0001437107


146it [02:22,  1.14it/s]

Already scraped CIK 0001437107
Scraping CIK 0001001082


148it [02:23,  1.29it/s]

Scraping CIK 0000029534


149it [02:24,  1.14it/s]

Scraping CIK 0000935703


150it [02:26,  1.00it/s]

Scraping CIK 0000715957


151it [02:27,  1.02s/it]

Scraping CIK 0001286681


152it [02:28,  1.01s/it]

Scraping CIK 0000029905


153it [02:29,  1.01s/it]

Scraping CIK 0001751788


154it [02:29,  1.10it/s]

Scraping CIK 0000936340


155it [02:30,  1.09it/s]

Scraping CIK 0001326160


156it [02:32,  1.07s/it]

Scraping CIK 0000783280


157it [02:33,  1.09s/it]

Scraping CIK 0001666700


158it [02:34,  1.08s/it]

Scraping CIK 0001688568


159it [02:35,  1.06it/s]

Scraping CIK 0001015780


160it [02:36,  1.07s/it]

Scraping CIK 0000915389


161it [02:37,  1.02s/it]

Scraping CIK 0001551182


162it [02:38,  1.05it/s]

Scraping CIK 0001065088


163it [02:38,  1.06it/s]

Scraping CIK 0000031462


164it [02:39,  1.10it/s]

Scraping CIK 0000827052


165it [02:40,  1.04it/s]

Scraping CIK 0001099800


166it [02:41,  1.10it/s]

Scraping CIK 0000712515


167it [02:42,  1.06it/s]

Scraping CIK 0000032604


168it [02:43,  1.06it/s]

Scraping CIK 0000065984


169it [02:45,  1.09s/it]

Scraping CIK 0000821189


170it [02:46,  1.06s/it]

Scraping CIK 0000033185


171it [02:46,  1.01it/s]

Scraping CIK 0001101239


172it [02:48,  1.08s/it]

Scraping CIK 0000906107


173it [02:49,  1.09s/it]

Scraping CIK 0000920522


174it [02:50,  1.03s/it]

Scraping CIK 0001001250


175it [02:51,  1.03it/s]

Scraping CIK 0001711269


176it [02:51,  1.06it/s]

Scraping CIK 0000072741


177it [02:52,  1.01it/s]

Scraping CIK 0001095073


178it [02:54,  1.01s/it]

Scraping CIK 0001109357


179it [02:55,  1.03s/it]

Scraping CIK 0001324424


180it [02:55,  1.04it/s]

Scraping CIK 0000746515


181it [02:56,  1.11it/s]

Scraping CIK 0001289490


182it [02:57,  1.15it/s]

Scraping CIK 0000034088


183it [02:58,  1.13it/s]

Scraping CIK 0001048695


184it [02:59,  1.14it/s]

Scraping CIK 0001326801


185it [03:00,  1.16it/s]

Scraping CIK 0000815556


186it [03:00,  1.18it/s]

Scraping CIK 0000034903


187it [03:01,  1.15it/s]

Scraping CIK 0001048911


188it [03:02,  1.14it/s]

Scraping CIK 0001136893


189it [03:03,  1.15it/s]

Scraping CIK 0000035527


190it [03:04,  1.05it/s]

Scraping CIK 0001031296


192it [03:05,  1.40it/s]

Scraping CIK 0001132979
Scraping CIK 0000798354


193it [03:06,  1.32it/s]

Scraping CIK 0001175454


194it [03:07,  1.29it/s]

Scraping CIK 0000354908


195it [03:08,  1.27it/s]

Scraping CIK 0000030625


196it [03:09,  1.20it/s]

Scraping CIK 0000037785


197it [03:10,  1.17it/s]

Scraping CIK 0000037996


198it [03:11,  1.10it/s]

Scraping CIK 0001262039


199it [03:12,  1.07it/s]

Scraping CIK 0001659166


200it [03:12,  1.14it/s]

Scraping CIK 0001519751


201it [03:13,  1.18it/s]

Scraping CIK 0001754301


202it [03:14,  1.17it/s]

Already scraped CIK 0001754301
Scraping CIK 0000038777


204it [03:15,  1.36it/s]

Scraping CIK 0000831259


205it [03:16,  1.28it/s]

Scraping CIK 0000039911


206it [03:17,  1.25it/s]

Scraping CIK 0001121788


207it [03:18,  1.23it/s]

Scraping CIK 0000749251


208it [03:19,  1.16it/s]

Scraping CIK 0000040533


209it [03:20,  1.12it/s]

Scraping CIK 0000040545


210it [03:20,  1.10it/s]

Scraping CIK 0000040704


211it [03:21,  1.11it/s]

Scraping CIK 0001467858


212it [03:22,  1.15it/s]

Scraping CIK 0000040987


213it [03:23,  1.15it/s]

Scraping CIK 0000882095


214it [03:24,  1.12it/s]

Scraping CIK 0000320335


215it [03:25,  1.03it/s]

Scraping CIK 0001123360


216it [03:26,  1.01it/s]

Scraping CIK 0000886982


217it [03:27,  1.00it/s]

Scraping CIK 0000277135


218it [03:28,  1.00it/s]

Scraping CIK 0000012659


219it [03:29,  1.04it/s]

Scraping CIK 0000045012


220it [03:30,  1.03it/s]

Scraping CIK 0001359841


221it [03:31,  1.05it/s]

Scraping CIK 0000793952


222it [03:32,  1.05it/s]

Scraping CIK 0000874766


223it [03:33,  1.06s/it]

Scraping CIK 0000046080


224it [03:34,  1.01s/it]

Scraping CIK 0000860730


225it [03:35,  1.00it/s]

Scraping CIK 0000765880


226it [03:36,  1.01s/it]

Scraping CIK 0001000228


227it [03:37,  1.04s/it]

Scraping CIK 0000047111


228it [03:38,  1.00s/it]

Scraping CIK 0000004447


229it [03:39,  1.00s/it]

Scraping CIK 0001645590


230it [03:40,  1.07it/s]

Scraping CIK 0001585689


231it [03:41,  1.10it/s]

Scraping CIK 0000048039


232it [03:42,  1.10it/s]

Scraping CIK 0000859737


233it [03:43,  1.08it/s]

Scraping CIK 0000354950


234it [03:43,  1.10it/s]

Scraping CIK 0000773840


235it [03:44,  1.12it/s]

Scraping CIK 0000048465


236it [03:45,  1.10it/s]

Scraping CIK 0001070750


237it [03:46,  1.02it/s]

Scraping CIK 0000004281


238it [03:48,  1.01s/it]

Scraping CIK 0000047217


239it [03:49,  1.03s/it]

Scraping CIK 0000049071


240it [03:50,  1.02s/it]

Scraping CIK 0000049196


241it [03:51,  1.16s/it]

Scraping CIK 0001501585


242it [03:52,  1.06s/it]

Scraping CIK 0000832101


243it [03:53,  1.02s/it]

Scraping CIK 0000874716


244it [03:54,  1.02s/it]

Scraping CIK 0001598014


245it [03:55,  1.05it/s]

Scraping CIK 0000049826


246it [03:56,  1.01it/s]

Scraping CIK 0001110803


247it [03:57,  1.06it/s]

Scraping CIK 0000879169


248it [03:58,  1.05it/s]

Scraping CIK 0001699150


249it [03:58,  1.08it/s]

Scraping CIK 0000050863


250it [04:00,  1.01s/it]

Scraping CIK 0001571949


251it [04:00,  1.05it/s]

Scraping CIK 0000051143


252it [04:01,  1.05it/s]

Scraping CIK 0000051434


253it [04:03,  1.01s/it]

Scraping CIK 0000051644


254it [04:04,  1.04s/it]

Scraping CIK 0000051253


255it [04:05,  1.01s/it]

Scraping CIK 0000896878


256it [04:06,  1.02it/s]

Scraping CIK 0001035267


257it [04:07,  1.04s/it]

Scraping CIK 0000914208


258it [04:08,  1.17s/it]

Scraping CIK 0001111928


259it [04:09,  1.09s/it]

Scraping CIK 0001478242


260it [04:10,  1.06s/it]

Scraping CIK 0001020569


261it [04:11,  1.07s/it]

Scraping CIK 0000779152


262it [04:12,  1.06s/it]

Scraping CIK 0000052988


263it [04:14,  1.15s/it]

Scraping CIK 0000728535


264it [04:14,  1.09s/it]

Scraping CIK 0000091419


265it [04:16,  1.34s/it]

Scraping CIK 0000200406


266it [04:19,  1.70s/it]

Scraping CIK 0000833444


267it [04:21,  1.67s/it]

Scraping CIK 0000019617


268it [04:23,  1.84s/it]

Scraping CIK 0001043604


269it [04:24,  1.59s/it]

Scraping CIK 0000054480


270it [04:25,  1.40s/it]

Scraping CIK 0000055067


271it [04:26,  1.29s/it]

Scraping CIK 0000091576


272it [04:27,  1.25s/it]

Scraping CIK 0001601046


273it [04:28,  1.10s/it]

Scraping CIK 0000055785


274it [04:29,  1.10s/it]

Scraping CIK 0000879101


275it [04:30,  1.06s/it]

Scraping CIK 0001506307


276it [04:31,  1.13s/it]

Scraping CIK 0000319201


277it [04:32,  1.08s/it]

Scraping CIK 0000885639


278it [04:33,  1.03it/s]

Scraping CIK 0001637459


279it [04:34,  1.04it/s]

Scraping CIK 0000056873


280it [04:35,  1.03it/s]

Scraping CIK 0000701985


281it [04:36,  1.06it/s]

Scraping CIK 0000202058


282it [04:37,  1.04it/s]

Scraping CIK 0000920148


283it [04:38,  1.03it/s]

Scraping CIK 0000707549


284it [04:39,  1.00it/s]

Scraping CIK 0001679273


285it [04:39,  1.06it/s]

Scraping CIK 0001300514


286it [04:40,  1.03it/s]

Scraping CIK 0000058492


287it [04:41,  1.02it/s]

Scraping CIK 0001336920


288it [04:42,  1.01it/s]

Scraping CIK 0000920760


289it [04:43,  1.01it/s]

Scraping CIK 0000059478


290it [04:44,  1.05it/s]

Scraping CIK 0000059558


291it [04:45,  1.03it/s]

Scraping CIK 0001707925


292it [04:46,  1.12it/s]

Scraping CIK 0001335258


293it [04:47,  1.11it/s]

Scraping CIK 0001065696


294it [04:48,  1.08it/s]

Scraping CIK 0000936468


295it [04:49,  1.03it/s]

Scraping CIK 0000060086


296it [04:50,  1.03s/it]

Scraping CIK 0000060667


297it [04:51,  1.02s/it]

Scraping CIK 0001489393


298it [04:52,  1.06it/s]

Scraping CIK 0000036270


299it [04:53,  1.04it/s]

Scraping CIK 0000101778


300it [04:54,  1.08it/s]

Scraping CIK 0001510295


301it [04:55,  1.06it/s]

Scraping CIK 0001278021


302it [04:56,  1.06it/s]

Scraping CIK 0001048286


303it [04:57,  1.07it/s]

Scraping CIK 0000062709


304it [04:58,  1.07it/s]

Scraping CIK 0000916076


305it [04:59,  1.06it/s]

Scraping CIK 0000062996


306it [04:59,  1.07it/s]

Scraping CIK 0001141391


307it [05:00,  1.09it/s]

Scraping CIK 0000063754


308it [05:01,  1.12it/s]

Scraping CIK 0000743316


309it [05:02,  1.14it/s]

Scraping CIK 0000063908


310it [05:03,  1.06it/s]

Scraping CIK 0000927653


311it [05:04,  1.05it/s]

Scraping CIK 0001613103


312it [05:05,  1.12it/s]

Scraping CIK 0000310158


313it [05:06,  1.08it/s]

Scraping CIK 0001099219


314it [05:07,  1.05s/it]

Scraping CIK 0001037646


315it [05:08,  1.05s/it]

Scraping CIK 0000789570


316it [05:09,  1.11s/it]

Scraping CIK 0000827054


317it [05:10,  1.08s/it]

Scraping CIK 0000723125


318it [05:11,  1.04s/it]

Scraping CIK 0000789019


319it [05:12,  1.00it/s]

Scraping CIK 0000912595


320it [05:13,  1.04s/it]

Scraping CIK 0000851968


321it [05:15,  1.10s/it]

Scraping CIK 0000024545


322it [05:16,  1.20s/it]

Scraping CIK 0001103982


323it [05:17,  1.14s/it]

Scraping CIK 0000865752


324it [05:18,  1.16s/it]

Scraping CIK 0001059556


325it [05:19,  1.15s/it]

Scraping CIK 0000895421


326it [05:20,  1.11s/it]

Scraping CIK 0001285785


327it [05:21,  1.08s/it]

Scraping CIK 0000068505


328it [05:23,  1.08s/it]

Scraping CIK 0001408198


329it [05:24,  1.08s/it]

Scraping CIK 0001623613


330it [05:25,  1.02s/it]

Scraping CIK 0001120193


331it [05:25,  1.03it/s]

Scraping CIK 0001021860


332it [05:26,  1.06it/s]

Scraping CIK 0001002047


333it [05:27,  1.08it/s]

Scraping CIK 0001065280


334it [05:28,  1.13it/s]

Scraping CIK 0000814453


335it [05:29,  1.14it/s]

Scraping CIK 0001164727


336it [05:30,  1.06it/s]

Scraping CIK 0001564708


337it [05:31,  1.12it/s]

Already scraped CIK 0001564708
Scraping CIK 0000753308


339it [05:32,  1.28it/s]

Scraping CIK 0001492633


340it [05:33,  1.18it/s]

Scraping CIK 0000320187


341it [05:34,  1.12it/s]

Scraping CIK 0001111711


342it [05:35,  1.08it/s]

Scraping CIK 0000072207


343it [05:36,  1.03it/s]

Scraping CIK 0000072333


344it [05:37,  1.04it/s]

Scraping CIK 0000702165


345it [05:38,  1.10it/s]

Scraping CIK 0000073124


346it [05:39,  1.08it/s]

Scraping CIK 0001133421


347it [05:39,  1.12it/s]

Scraping CIK 0000849399


348it [05:40,  1.10it/s]

Scraping CIK 0001513761


349it [05:41,  1.08it/s]

Scraping CIK 0001013871


350it [05:42,  1.01it/s]

Scraping CIK 0000073309


351it [05:44,  1.05s/it]

Scraping CIK 0001045810


352it [05:45,  1.02s/it]

Scraping CIK 0000906163


353it [05:46,  1.08s/it]

Scraping CIK 0000898173


354it [05:46,  1.03it/s]

Scraping CIK 0000797468


355it [05:47,  1.02it/s]

Scraping CIK 0000878927


356it [05:48,  1.03it/s]

Scraping CIK 0000029989


357it [05:49,  1.02it/s]

Scraping CIK 0001039684


358it [05:51,  1.05s/it]

Scraping CIK 0001341439


360it [05:52,  1.33it/s]

Scraping CIK 0001781335
Scraping CIK 0000075362


361it [05:53,  1.04s/it]

Scraping CIK 0000075677


362it [05:54,  1.02it/s]

Scraping CIK 0000076334


363it [05:55,  1.05it/s]

Scraping CIK 0000723531


364it [05:56,  1.09it/s]

Scraping CIK 0001590955


365it [05:57,  1.09it/s]

Scraping CIK 0001633917


366it [05:58,  1.03it/s]

Scraping CIK 0000077360


367it [05:59,  1.02s/it]

Scraping CIK 0001378946


368it [06:00,  1.02s/it]

Scraping CIK 0000077476


369it [06:01,  1.00s/it]

Scraping CIK 0000031791


370it [06:02,  1.01s/it]

Scraping CIK 0001585364


371it [06:03,  1.01it/s]

Scraping CIK 0000078003


372it [06:04,  1.03s/it]

Scraping CIK 0001413329


373it [06:05,  1.02s/it]

Scraping CIK 0001534701


374it [06:06,  1.04s/it]

Scraping CIK 0000764622


375it [06:08,  1.21s/it]

Scraping CIK 0001038357


376it [06:09,  1.23s/it]

Scraping CIK 0000713676


377it [06:10,  1.20s/it]

Scraping CIK 0000079879


378it [06:11,  1.13s/it]

Scraping CIK 0000922224


379it [06:12,  1.12s/it]

Scraping CIK 0001126328


380it [06:14,  1.24s/it]

Scraping CIK 0000080424


381it [06:15,  1.16s/it]

Scraping CIK 0000080661


382it [06:16,  1.14s/it]

Scraping CIK 0001045609


383it [06:17,  1.15s/it]

Scraping CIK 0001137774


384it [06:18,  1.17s/it]

Scraping CIK 0000788784


385it [06:19,  1.16s/it]

Scraping CIK 0001393311


386it [06:21,  1.17s/it]

Scraping CIK 0000822416


387it [06:22,  1.16s/it]

Scraping CIK 0000078239


388it [06:23,  1.11s/it]

Scraping CIK 0001604778


389it [06:24,  1.00s/it]

Scraping CIK 0001050915


390it [06:24,  1.02it/s]

Scraping CIK 0000804328


391it [06:26,  1.01s/it]

Scraping CIK 0001022079


392it [06:27,  1.06s/it]

Scraping CIK 0001037038


393it [06:28,  1.00s/it]

Scraping CIK 0000720005


394it [06:29,  1.02it/s]

Scraping CIK 0000101829


395it [06:30,  1.00it/s]

Scraping CIK 0000726728


396it [06:31,  1.03s/it]

Scraping CIK 0000910606


397it [06:32,  1.16s/it]

Scraping CIK 0000872589


398it [06:33,  1.09s/it]

Scraping CIK 0001281761


399it [06:34,  1.14s/it]

Scraping CIK 0001060391


400it [06:35,  1.15s/it]

Scraping CIK 0000943819


401it [06:36,  1.08s/it]

Scraping CIK 0000315213


402it [06:37,  1.04s/it]

Scraping CIK 0001024478


403it [06:38,  1.00s/it]

Scraping CIK 0000084839


404it [06:39,  1.03it/s]

Scraping CIK 0000882835


405it [06:40,  1.00s/it]

Scraping CIK 0000745732


406it [06:41,  1.04it/s]

Scraping CIK 0000884887


407it [06:43,  1.11s/it]

Scraping CIK 0000064040


408it [06:44,  1.18s/it]

Scraping CIK 0001108524


409it [06:45,  1.07s/it]

Scraping CIK 0001034054


410it [06:46,  1.09s/it]

Scraping CIK 0000087347


411it [06:47,  1.06s/it]

Scraping CIK 0001137789


412it [06:48,  1.06s/it]

Scraping CIK 0001012100


413it [06:49,  1.08s/it]

Scraping CIK 0001032208


414it [06:50,  1.10s/it]

Scraping CIK 0001373715


415it [06:51,  1.00s/it]

Scraping CIK 0000089800


416it [06:52,  1.05it/s]

Scraping CIK 0001063761


417it [06:53,  1.04it/s]

Scraping CIK 0000004127


418it [06:54,  1.06it/s]

Scraping CIK 0001040971


419it [06:55,  1.05it/s]

Scraping CIK 0000091440


420it [06:56,  1.03it/s]

Scraping CIK 0000092122


421it [06:57,  1.11s/it]

Scraping CIK 0000092380


422it [06:58,  1.03s/it]

Scraping CIK 0000093556


423it [06:59,  1.02it/s]

Scraping CIK 0000829224


424it [07:00,  1.03it/s]

Scraping CIK 0000093751


425it [07:01,  1.05s/it]

Scraping CIK 0001757898


426it [07:02,  1.01it/s]

Scraping CIK 0000310764


427it [07:03,  1.03it/s]

Scraping CIK 0000719739


428it [07:04,  1.02s/it]

Scraping CIK 0001601712


429it [07:05,  1.02it/s]

Scraping CIK 0000883241


430it [07:06,  1.04it/s]

Scraping CIK 0000096021


431it [07:07,  1.01it/s]

Scraping CIK 0001283699


432it [07:08,  1.07it/s]

Scraping CIK 0001113169


433it [07:09,  1.06it/s]

Scraping CIK 0000946581


434it [07:09,  1.10it/s]

Scraping CIK 0001116132


435it [07:10,  1.12it/s]

Scraping CIK 0000027419


436it [07:11,  1.12it/s]

Scraping CIK 0001385157


437it [07:12,  1.06it/s]

Scraping CIK 0001681459


438it [07:13,  1.11it/s]

Scraping CIK 0000096943


439it [07:14,  1.10it/s]

Scraping CIK 0000097476


440it [07:15,  1.09it/s]

Scraping CIK 0000217346


441it [07:16,  1.08it/s]

Scraping CIK 0000097745


442it [07:17,  1.00it/s]

Scraping CIK 0000098246


443it [07:18,  1.02it/s]

Scraping CIK 0000109198


444it [07:19,  1.05it/s]

Scraping CIK 0000916365


445it [07:20,  1.07it/s]

Scraping CIK 0001466258


446it [07:20,  1.13it/s]

Scraping CIK 0001260221


447it [07:21,  1.09it/s]

Scraping CIK 0000086312


448it [07:23,  1.02it/s]

Scraping CIK 0000092230


449it [07:24,  1.00it/s]

Scraping CIK 0001418091


450it [07:24,  1.08it/s]

Scraping CIK 0000100493


451it [07:25,  1.05it/s]

Scraping CIK 0000074208


452it [07:27,  1.08s/it]

Scraping CIK 0001403568


453it [07:27,  1.02it/s]

Scraping CIK 0000036104


454it [07:28,  1.03it/s]

Scraping CIK 0001336917


455it [07:29,  1.03it/s]

Already scraped CIK 0001336917
Scraping CIK 0000100885


457it [07:30,  1.21it/s]

Scraping CIK 0000100517


458it [07:31,  1.19it/s]

Scraping CIK 0000731766


459it [07:32,  1.13it/s]

Scraping CIK 0001090727


460it [07:33,  1.03it/s]

Scraping CIK 0001067701


461it [07:34,  1.04it/s]

Scraping CIK 0000352915


462it [07:36,  1.11s/it]

Scraping CIK 0000005513


463it [07:37,  1.07s/it]

Scraping CIK 0000103379


464it [07:38,  1.04s/it]

Scraping CIK 0001035002


465it [07:39,  1.01s/it]

Scraping CIK 0000203527


466it [07:40,  1.00s/it]

Scraping CIK 0000740260


467it [07:41,  1.14s/it]

Scraping CIK 0001014473


468it [07:42,  1.04s/it]

Scraping CIK 0001442145


469it [07:43,  1.03it/s]

Scraping CIK 0000732712


470it [07:44,  1.03s/it]

Scraping CIK 0000875320


471it [07:45,  1.03s/it]

Scraping CIK 0000813828


472it [07:46,  1.04s/it]

Scraping CIK 0001403161


473it [07:47,  1.08s/it]

Scraping CIK 0000899689


474it [07:48,  1.11s/it]

Scraping CIK 0001396009


475it [07:49,  1.07s/it]

Scraping CIK 0000011544


476it [07:50,  1.03s/it]

Scraping CIK 0000943452


477it [07:51,  1.04s/it]

Scraping CIK 0000104169


478it [07:52,  1.07s/it]

Scraping CIK 0001618921


479it [07:53,  1.01it/s]

Scraping CIK 0001744489


480it [07:54,  1.06it/s]

Scraping CIK 0000823768


481it [07:55,  1.08it/s]

Scraping CIK 0001000697


482it [07:56,  1.06it/s]

Scraping CIK 0000783325


483it [07:57,  1.05it/s]

Scraping CIK 0000072971


484it [07:58,  1.08it/s]

Scraping CIK 0000766704


485it [07:59,  1.10s/it]

Scraping CIK 0000105770


486it [08:00,  1.06s/it]

Scraping CIK 0000106040


487it [08:01,  1.02s/it]

Scraping CIK 0001365135


488it [08:02,  1.02it/s]

Scraping CIK 0001732845


489it [08:03,  1.00s/it]

Scraping CIK 0000106535


490it [08:04,  1.02s/it]

Scraping CIK 0000106640


491it [08:05,  1.06s/it]

Scraping CIK 0000107263


492it [08:06,  1.06s/it]

Scraping CIK 0001140536


493it [08:08,  1.07s/it]

Scraping CIK 0001174922


494it [08:09,  1.07s/it]

Scraping CIK 0000072903


495it [08:10,  1.04s/it]

Scraping CIK 0001770450


496it [08:10,  1.01it/s]

Scraping CIK 0000743988


497it [08:11,  1.01it/s]

Scraping CIK 0001524472


498it [08:12,  1.07it/s]

Scraping CIK 0001041061


499it [08:13,  1.04it/s]

Scraping CIK 0000877212


500it [08:15,  1.08s/it]

Scraping CIK 0001136869


501it [08:16,  1.09s/it]

Scraping CIK 0000109380


502it [08:17,  1.12s/it]

Scraping CIK 0001555280


503it [08:18,  1.01it/s]


In [623]:
# Run the function to scrape 10-Qs
# Define parameters
browse_url_base_10q = 'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={}&type=10-Q&count=1000'
filing_url_base_10q = 'http://www.sec.gov/Archives/edgar/data/{}/{}-index.html'
doc_url_base_10q = 'http://www.sec.gov/Archives/edgar/data/{}/{}/{}'

# Set correct directory
os.chdir(pathname_10q)

# Initialize log file
# (log file name = the time we initiate scraping session)
time = strftime("%Y-%m-%d %Hh%Mm%Ss", gmtime())
log_file_name = 'log '+time+'.txt'
with open(log_file_name, 'a') as log_file:
    log_file.close()

# Iterate over CIKs and scrape 10-Ks
for ticker,row in tqdm(ticker_cik_df.iterrows()):
    ScrapeDocument(ticker=ticker,
                   browse_url_base=browse_url_base_10q, 
                   filing_url_base=filing_url_base_10q, 
                   doc_url_base=doc_url_base_10q, 
                   cik=row['cik'],
                   log_file_name=log_file_name,
                   is10K = False)
    
os.chdir('..')

0it [00:00, ?it/s]

Scraping CIK 0000066740


1it [00:01,  1.22s/it]

Scraping CIK 0000001800


2it [00:02,  1.20s/it]

Scraping CIK 0001551152


3it [00:03,  1.27s/it]

Scraping CIK 0000815094


4it [00:04,  1.21s/it]

Scraping CIK 0001467373


5it [00:05,  1.09s/it]

Scraping CIK 0000718877


6it [00:06,  1.13s/it]

Scraping CIK 0000796343


7it [00:07,  1.12s/it]

Scraping CIK 0000002488


8it [00:09,  1.09s/it]

Scraping CIK 0001158449


9it [00:09,  1.04s/it]

Scraping CIK 0000874761


10it [00:10,  1.05s/it]

Scraping CIK 0000004977


11it [00:12,  1.14s/it]

Scraping CIK 0001090872


12it [00:13,  1.11s/it]

Scraping CIK 0000002969


13it [00:14,  1.11s/it]

Scraping CIK 0001086222


14it [00:15,  1.11s/it]

Scraping CIK 0000766421


15it [00:16,  1.11s/it]

Scraping CIK 0000915913


16it [00:17,  1.07s/it]

Scraping CIK 0001035443


17it [00:18,  1.08s/it]

Scraping CIK 0000899866


18it [00:19,  1.08s/it]

Scraping CIK 0001097149


19it [00:20,  1.05s/it]

Scraping CIK 0001579241


20it [00:21,  1.04it/s]

Scraping CIK 0001101215


21it [00:22,  1.03s/it]

Scraping CIK 0000352541


22it [00:23,  1.05s/it]

Scraping CIK 0000899051


23it [00:25,  1.08s/it]

Scraping CIK 0001652044


24it [00:26,  1.04s/it]

Already scraped CIK 0001652044
Scraping CIK 0000764180


26it [00:27,  1.11it/s]

Scraping CIK 0001018724


27it [00:28,  1.08it/s]

Scraping CIK 0001748790


28it [00:29,  1.09it/s]

Scraping CIK 0001002910


29it [00:30,  1.11s/it]

Scraping CIK 0000006201


30it [00:31,  1.13s/it]

Scraping CIK 0000004904


31it [00:33,  1.20s/it]

Scraping CIK 0000004962


32it [00:34,  1.20s/it]

Scraping CIK 0000005272


33it [00:35,  1.31s/it]

Scraping CIK 0001053507


34it [00:36,  1.22s/it]

Scraping CIK 0001410636


35it [00:38,  1.18s/it]

Scraping CIK 0000820027


36it [00:39,  1.13s/it]

Scraping CIK 0001140859


37it [00:39,  1.08s/it]

Scraping CIK 0001037868


38it [00:40,  1.04s/it]

Scraping CIK 0000318154


39it [00:42,  1.06s/it]

Scraping CIK 0000820313


40it [00:43,  1.11s/it]

Scraping CIK 0000006281


41it [00:44,  1.09s/it]

Scraping CIK 0001013462


42it [00:45,  1.08s/it]

Scraping CIK 0001156039


43it [00:46,  1.09s/it]

Scraping CIK 0000315293


44it [00:47,  1.10s/it]

Scraping CIK 0000091142


45it [00:48,  1.07s/it]

Scraping CIK 0000006769


46it [00:50,  1.19s/it]

Scraping CIK 0000922864


47it [00:51,  1.32s/it]

Scraping CIK 0000320193


48it [00:52,  1.25s/it]

Scraping CIK 0000006951


49it [00:54,  1.29s/it]

Scraping CIK 0001521332


50it [00:55,  1.18s/it]

Scraping CIK 0000007084


51it [00:56,  1.13s/it]

Scraping CIK 0001596532


52it [00:57,  1.10s/it]

Scraping CIK 0000354190


53it [00:58,  1.31s/it]

Scraping CIK 0001267238


54it [00:59,  1.21s/it]

Scraping CIK 0000732717


55it [01:01,  1.23s/it]

Scraping CIK 0000731802


56it [01:02,  1.18s/it]

Scraping CIK 0000769397


57it [01:03,  1.16s/it]

Scraping CIK 0000008670


58it [01:04,  1.16s/it]

Scraping CIK 0000866787


59it [01:05,  1.13s/it]

Scraping CIK 0000915912


60it [01:06,  1.07s/it]

Scraping CIK 0000008818


61it [01:07,  1.05s/it]

Scraping CIK 0001701605


62it [01:08,  1.05it/s]

Scraping CIK 0000009389


63it [01:09,  1.02it/s]

Scraping CIK 0000070858


64it [01:10,  1.14s/it]

Scraping CIK 0001390777


65it [01:12,  1.18s/it]

Scraping CIK 0000010456


66it [01:13,  1.21s/it]

Scraping CIK 0000010795


67it [01:14,  1.19s/it]

Scraping CIK 0000764478


68it [01:15,  1.18s/it]

Scraping CIK 0000875045


69it [01:16,  1.15s/it]

Scraping CIK 0001364742


70it [01:18,  1.25s/it]

Scraping CIK 0000012927


71it [01:19,  1.16s/it]

Scraping CIK 0001075531


72it [01:20,  1.29s/it]

Scraping CIK 0000908255


73it [01:21,  1.20s/it]

Scraping CIK 0001037540


74it [01:22,  1.17s/it]

Scraping CIK 0000885725


75it [01:23,  1.15s/it]

Scraping CIK 0000014272


76it [01:25,  1.18s/it]

Scraping CIK 0001730168


77it [01:26,  1.11s/it]

Scraping CIK 0001383312


78it [01:27,  1.11s/it]

Scraping CIK 0001043277


79it [01:28,  1.09s/it]

Scraping CIK 0000858470


80it [01:29,  1.08s/it]

Scraping CIK 0000813672


81it [01:30,  1.06s/it]

Scraping CIK 0000016732


82it [01:31,  1.06s/it]

Scraping CIK 0000927628


83it [01:32,  1.17s/it]

Scraping CIK 0000721371


84it [01:34,  1.16s/it]

Scraping CIK 0001170010


85it [01:34,  1.09s/it]

Scraping CIK 0000815097


86it [01:36,  1.11s/it]

Scraping CIK 0001783180


87it [01:36,  1.01s/it]

Scraping CIK 0000018230


88it [01:37,  1.04s/it]

Scraping CIK 0001374310


89it [01:39,  1.08s/it]

Scraping CIK 0001138118


90it [01:40,  1.07s/it]

Scraping CIK 0001402057


91it [01:41,  1.01s/it]

Scraping CIK 0001306830


92it [01:42,  1.02s/it]

Scraping CIK 0001071739


93it [01:43,  1.00it/s]

Scraping CIK 0001130310


94it [01:44,  1.07s/it]

Scraping CIK 0000018926


95it [01:45,  1.10s/it]

Scraping CIK 0000804753


96it [01:46,  1.12s/it]

Scraping CIK 0001324404


97it [01:47,  1.08s/it]

Scraping CIK 0000316709


98it [01:48,  1.15s/it]

Scraping CIK 0001091667


99it [01:50,  1.17s/it]

Scraping CIK 0000093410


100it [01:51,  1.14s/it]

Scraping CIK 0001058090


101it [01:52,  1.05s/it]

Scraping CIK 0000896159


102it [01:53,  1.09s/it]

Scraping CIK 0000313927


103it [01:54,  1.05s/it]

Scraping CIK 0001739940


104it [01:55,  1.02it/s]

Scraping CIK 0000020286


105it [01:56,  1.00s/it]

Scraping CIK 0000723254


106it [01:57,  1.09s/it]

Scraping CIK 0000858877


107it [01:58,  1.10s/it]

Scraping CIK 0000831001


108it [02:00,  1.27s/it]

Scraping CIK 0000759944


109it [02:01,  1.24s/it]

Scraping CIK 0000877890


110it [02:02,  1.18s/it]

Scraping CIK 0000021076


111it [02:03,  1.12s/it]

Scraping CIK 0001156375


112it [02:04,  1.10s/it]

Scraping CIK 0000811156


113it [02:05,  1.14s/it]

Scraping CIK 0000021344


114it [02:06,  1.16s/it]

Scraping CIK 0001058290


115it [02:07,  1.13s/it]

Scraping CIK 0000021665


116it [02:08,  1.09s/it]

Scraping CIK 0001166691


117it [02:09,  1.01s/it]

Scraping CIK 0000028412


118it [02:10,  1.08s/it]

Scraping CIK 0000023217


119it [02:12,  1.10s/it]

Scraping CIK 0001358071


120it [02:13,  1.04s/it]

Scraping CIK 0001163165


121it [02:14,  1.04s/it]

Scraping CIK 0001047862


122it [02:15,  1.18s/it]

Scraping CIK 0000016918


123it [02:17,  1.32s/it]

Scraping CIK 0000711404


124it [02:19,  1.51s/it]

Scraping CIK 0000900075


125it [02:21,  1.69s/it]

Scraping CIK 0000024741


126it [02:22,  1.65s/it]

Scraping CIK 0001755672


127it [02:23,  1.47s/it]

Scraping CIK 0000909832


128it [02:25,  1.64s/it]

Scraping CIK 0001024305


129it [02:27,  1.57s/it]

Scraping CIK 0001051470


130it [02:28,  1.47s/it]

Scraping CIK 0000277948


131it [02:30,  1.71s/it]

Scraping CIK 0000026172


132it [02:33,  2.07s/it]

Scraping CIK 0000064803


133it [02:35,  1.99s/it]

Scraping CIK 0000882184


134it [02:38,  2.17s/it]

Scraping CIK 0000313616


135it [02:39,  2.03s/it]

Scraping CIK 0000940944


136it [02:40,  1.76s/it]

Scraping CIK 0000927066


137it [02:42,  1.58s/it]

Scraping CIK 0000315189


138it [02:44,  1.79s/it]

Scraping CIK 0000027904


139it [02:46,  1.92s/it]

Scraping CIK 0000818479


140it [02:47,  1.75s/it]

Scraping CIK 0001090012


141it [02:49,  1.61s/it]

Scraping CIK 0001093557


142it [02:50,  1.44s/it]

Scraping CIK 0001539838


143it [02:51,  1.29s/it]

Scraping CIK 0001297996


144it [02:52,  1.29s/it]

Scraping CIK 0001393612


145it [02:54,  1.35s/it]

Scraping CIK 0001437107


146it [02:55,  1.26s/it]

Already scraped CIK 0001437107
Scraping CIK 0001001082


148it [02:56,  1.06s/it]

Scraping CIK 0000029534


149it [02:57,  1.04s/it]

Scraping CIK 0000935703


150it [02:58,  1.23s/it]

Scraping CIK 0000715957


151it [03:00,  1.39s/it]

Scraping CIK 0001286681


152it [03:01,  1.25s/it]

Scraping CIK 0000029905


153it [03:03,  1.47s/it]

Scraping CIK 0001751788


154it [03:05,  1.55s/it]

Scraping CIK 0000936340


155it [03:07,  1.87s/it]

Scraping CIK 0001326160


156it [03:09,  1.77s/it]

Scraping CIK 0000783280


157it [03:11,  1.92s/it]

Scraping CIK 0001666700


158it [03:13,  1.72s/it]

Scraping CIK 0001688568


159it [03:14,  1.57s/it]

Scraping CIK 0001015780


160it [03:16,  1.84s/it]

Scraping CIK 0000915389


161it [03:19,  2.07s/it]

Scraping CIK 0001551182


162it [03:21,  1.96s/it]

Scraping CIK 0001065088


163it [03:22,  1.89s/it]

Scraping CIK 0000031462


164it [03:25,  2.02s/it]

Scraping CIK 0000827052


165it [03:26,  1.80s/it]

Scraping CIK 0001099800


166it [03:27,  1.69s/it]

Scraping CIK 0000712515


167it [03:29,  1.79s/it]

Scraping CIK 0000032604


168it [03:31,  1.74s/it]

Scraping CIK 0000065984


169it [03:34,  1.99s/it]

Scraping CIK 0000821189


170it [03:36,  2.05s/it]

Scraping CIK 0000033185


171it [03:39,  2.35s/it]

Scraping CIK 0001101239


172it [03:41,  2.17s/it]

Scraping CIK 0000906107


173it [03:42,  2.11s/it]

Scraping CIK 0000920522


174it [03:44,  1.97s/it]

Scraping CIK 0001001250


175it [03:46,  1.96s/it]

Scraping CIK 0001711269


176it [03:47,  1.67s/it]

Scraping CIK 0000072741


177it [03:48,  1.55s/it]

Scraping CIK 0001095073


178it [03:49,  1.44s/it]

Scraping CIK 0001109357


179it [03:52,  1.67s/it]

Scraping CIK 0001324424


180it [03:53,  1.46s/it]

Scraping CIK 0000746515


181it [03:54,  1.43s/it]

Scraping CIK 0001289490


182it [03:56,  1.57s/it]

Scraping CIK 0000034088


183it [03:58,  1.59s/it]

Scraping CIK 0001048695


184it [03:59,  1.59s/it]

Scraping CIK 0001326801


185it [04:00,  1.43s/it]

Scraping CIK 0000815556


186it [04:02,  1.49s/it]

Scraping CIK 0000034903


187it [04:03,  1.42s/it]

Scraping CIK 0001048911


188it [04:05,  1.42s/it]

Scraping CIK 0001136893


189it [04:06,  1.43s/it]

Scraping CIK 0000035527


190it [04:08,  1.70s/it]

Scraping CIK 0001031296


191it [04:10,  1.66s/it]

Scraping CIK 0001132979


192it [04:10,  1.24s/it]

Scraping CIK 0000798354


193it [04:12,  1.48s/it]

Scraping CIK 0001175454


194it [04:14,  1.46s/it]

Scraping CIK 0000354908


195it [04:15,  1.53s/it]

Scraping CIK 0000030625


196it [04:16,  1.42s/it]

Scraping CIK 0000037785


197it [04:18,  1.53s/it]

Scraping CIK 0000037996


198it [04:21,  1.80s/it]

Scraping CIK 0001262039


199it [04:22,  1.70s/it]

Scraping CIK 0001659166


200it [04:23,  1.47s/it]

Scraping CIK 0001519751


201it [04:24,  1.45s/it]

Scraping CIK 0001754301


202it [04:25,  1.28s/it]

Already scraped CIK 0001754301
Scraping CIK 0000038777


204it [04:27,  1.07s/it]

Scraping CIK 0000831259


205it [04:28,  1.21s/it]

Scraping CIK 0000039911


206it [04:31,  1.60s/it]

Scraping CIK 0001121788


207it [04:32,  1.59s/it]

Scraping CIK 0000749251


208it [04:34,  1.55s/it]

Scraping CIK 0000040533


209it [04:35,  1.48s/it]

Scraping CIK 0000040545


210it [04:37,  1.53s/it]

Scraping CIK 0000040704


211it [04:38,  1.48s/it]

Scraping CIK 0001467858


212it [04:39,  1.39s/it]

Scraping CIK 0000040987


213it [04:40,  1.29s/it]

Scraping CIK 0000882095


214it [04:42,  1.33s/it]

Scraping CIK 0000320335


215it [04:43,  1.38s/it]

Scraping CIK 0001123360


216it [04:45,  1.49s/it]

Scraping CIK 0000886982


217it [04:48,  1.91s/it]

Scraping CIK 0000277135


218it [04:50,  2.01s/it]

Scraping CIK 0000012659


219it [04:52,  1.97s/it]

Scraping CIK 0000045012


220it [04:53,  1.79s/it]

Scraping CIK 0001359841


221it [04:55,  1.67s/it]

Scraping CIK 0000793952


222it [04:56,  1.63s/it]

Scraping CIK 0000874766


223it [04:58,  1.66s/it]

Scraping CIK 0000046080


224it [04:59,  1.58s/it]

Scraping CIK 0000860730


225it [05:01,  1.50s/it]

Scraping CIK 0000765880


226it [05:03,  1.80s/it]

Scraping CIK 0001000228


227it [05:05,  1.75s/it]

Scraping CIK 0000047111


228it [05:06,  1.72s/it]

Scraping CIK 0000004447


229it [05:08,  1.63s/it]

Scraping CIK 0001645590


230it [05:09,  1.57s/it]

Scraping CIK 0001585689


231it [05:11,  1.57s/it]

Scraping CIK 0000048039


232it [05:15,  2.22s/it]

Scraping CIK 0000859737


233it [05:17,  2.27s/it]

Scraping CIK 0000354950


234it [05:20,  2.44s/it]

Scraping CIK 0000773840


235it [05:21,  2.12s/it]

Scraping CIK 0000048465


236it [05:23,  1.97s/it]

Scraping CIK 0001070750


237it [05:25,  2.02s/it]

Scraping CIK 0000004281


238it [05:26,  1.86s/it]

Scraping CIK 0000047217


239it [05:28,  1.84s/it]

Scraping CIK 0000049071


240it [05:31,  2.11s/it]

Scraping CIK 0000049196


241it [05:33,  2.21s/it]

Scraping CIK 0001501585


242it [05:35,  1.95s/it]

Scraping CIK 0000832101


243it [05:36,  1.87s/it]

Scraping CIK 0000874716


244it [05:39,  1.97s/it]

Scraping CIK 0001598014


245it [05:39,  1.66s/it]

Scraping CIK 0000049826


246it [05:41,  1.56s/it]

Scraping CIK 0001110803


247it [05:42,  1.48s/it]

Scraping CIK 0000879169


248it [05:44,  1.62s/it]

Scraping CIK 0001699150


249it [05:45,  1.45s/it]

Scraping CIK 0000050863


250it [05:46,  1.42s/it]

Scraping CIK 0001571949


251it [05:48,  1.32s/it]

Scraping CIK 0000051143


252it [05:49,  1.41s/it]

Scraping CIK 0000051434


253it [05:50,  1.37s/it]

Scraping CIK 0000051644


254it [05:52,  1.33s/it]

Scraping CIK 0000051253


255it [05:53,  1.32s/it]

Scraping CIK 0000896878


256it [05:54,  1.30s/it]

Scraping CIK 0001035267


257it [05:55,  1.26s/it]

Scraping CIK 0000914208


258it [05:57,  1.45s/it]

Scraping CIK 0001111928


259it [05:59,  1.38s/it]

Scraping CIK 0001478242


260it [05:59,  1.25s/it]

Scraping CIK 0001020569


261it [06:01,  1.35s/it]

Scraping CIK 0000779152


262it [06:02,  1.34s/it]

Scraping CIK 0000052988


263it [06:04,  1.47s/it]

Scraping CIK 0000728535


264it [06:05,  1.38s/it]

Scraping CIK 0000091419


265it [06:06,  1.28s/it]

Scraping CIK 0000200406


266it [06:08,  1.35s/it]

Scraping CIK 0000833444


267it [06:10,  1.64s/it]

Scraping CIK 0000019617


268it [06:13,  1.94s/it]

Scraping CIK 0001043604


269it [06:14,  1.77s/it]

Scraping CIK 0000054480


270it [06:15,  1.58s/it]

Scraping CIK 0000055067


271it [06:17,  1.47s/it]

Scraping CIK 0000091576


272it [06:19,  1.65s/it]

Scraping CIK 0001601046


273it [06:19,  1.40s/it]

Scraping CIK 0000055785


274it [06:21,  1.35s/it]

Scraping CIK 0000879101


275it [06:22,  1.38s/it]

Scraping CIK 0001506307


276it [06:23,  1.34s/it]

Scraping CIK 0000319201


277it [06:25,  1.32s/it]

Scraping CIK 0000885639


278it [06:26,  1.27s/it]

Scraping CIK 0001637459


279it [06:27,  1.19s/it]

Scraping CIK 0000056873


280it [06:28,  1.18s/it]

Scraping CIK 0000701985


281it [06:30,  1.30s/it]

Scraping CIK 0000202058


282it [06:31,  1.30s/it]

Scraping CIK 0000920148


283it [06:32,  1.31s/it]

Scraping CIK 0000707549


284it [06:33,  1.24s/it]

Scraping CIK 0001679273


285it [06:34,  1.12s/it]

Scraping CIK 0001300514


286it [06:35,  1.07s/it]

Scraping CIK 0000058492


287it [06:36,  1.15s/it]

Scraping CIK 0001336920


288it [06:37,  1.12s/it]

Scraping CIK 0000920760


289it [06:38,  1.10s/it]

Scraping CIK 0000059478


290it [06:40,  1.15s/it]

Scraping CIK 0000059558


291it [06:42,  1.43s/it]

Scraping CIK 0001707925


292it [06:43,  1.22s/it]

Scraping CIK 0001335258


293it [06:43,  1.14s/it]

Scraping CIK 0001065696


294it [06:45,  1.15s/it]

Scraping CIK 0000936468


295it [06:46,  1.19s/it]

Scraping CIK 0000060086


296it [06:47,  1.21s/it]

Scraping CIK 0000060667


297it [06:48,  1.20s/it]

Scraping CIK 0001489393


298it [06:49,  1.14s/it]

Scraping CIK 0000036270


299it [06:51,  1.21s/it]

Scraping CIK 0000101778


300it [06:52,  1.21s/it]

Scraping CIK 0001510295


301it [06:53,  1.12s/it]

Scraping CIK 0001278021


302it [06:54,  1.07s/it]

Scraping CIK 0001048286


303it [06:55,  1.08s/it]

Scraping CIK 0000062709


304it [06:56,  1.17s/it]

Scraping CIK 0000916076


305it [06:57,  1.15s/it]

Scraping CIK 0000062996


306it [06:58,  1.12s/it]

Scraping CIK 0001141391


307it [07:00,  1.13s/it]

Scraping CIK 0000063754


308it [07:01,  1.15s/it]

Scraping CIK 0000743316


309it [07:02,  1.12s/it]

Scraping CIK 0000063908


310it [07:03,  1.18s/it]

Scraping CIK 0000927653


311it [07:05,  1.24s/it]

Scraping CIK 0001613103


312it [07:06,  1.24s/it]

Scraping CIK 0000310158


313it [07:08,  1.55s/it]

Scraping CIK 0001099219


314it [07:10,  1.53s/it]

Scraping CIK 0001037646


315it [07:11,  1.43s/it]

Scraping CIK 0000789570


316it [07:12,  1.44s/it]

Scraping CIK 0000827054


317it [07:14,  1.54s/it]

Scraping CIK 0000723125


318it [07:16,  1.81s/it]

Scraping CIK 0000789019


319it [07:18,  1.82s/it]

Scraping CIK 0000912595


320it [07:20,  1.74s/it]

Scraping CIK 0000851968


321it [07:21,  1.60s/it]

Scraping CIK 0000024545


322it [07:24,  1.86s/it]

Scraping CIK 0001103982


323it [07:25,  1.74s/it]

Scraping CIK 0000865752


324it [07:26,  1.57s/it]

Scraping CIK 0001059556


325it [07:28,  1.57s/it]

Scraping CIK 0000895421


326it [07:30,  1.69s/it]

Scraping CIK 0001285785


327it [07:31,  1.48s/it]

Scraping CIK 0000068505


328it [07:33,  1.61s/it]

Scraping CIK 0001408198


329it [07:34,  1.48s/it]

Scraping CIK 0001623613


330it [07:35,  1.33s/it]

Scraping CIK 0001120193


331it [07:37,  1.45s/it]

Scraping CIK 0001021860


332it [07:38,  1.43s/it]

Scraping CIK 0001002047


333it [07:40,  1.57s/it]

Scraping CIK 0001065280


334it [07:42,  1.85s/it]

Scraping CIK 0000814453


335it [07:44,  1.84s/it]

Scraping CIK 0001164727


336it [07:47,  2.08s/it]

Scraping CIK 0001564708


337it [07:48,  1.83s/it]

Already scraped CIK 0001564708
Scraping CIK 0000753308


339it [07:50,  1.50s/it]

Scraping CIK 0001492633


340it [07:51,  1.45s/it]

Scraping CIK 0000320187


341it [07:53,  1.73s/it]

Scraping CIK 0001111711


342it [07:55,  1.75s/it]

Scraping CIK 0000072207


343it [07:56,  1.62s/it]

Scraping CIK 0000072333


344it [07:57,  1.47s/it]

Scraping CIK 0000702165


345it [07:59,  1.35s/it]

Scraping CIK 0000073124


346it [08:00,  1.31s/it]

Scraping CIK 0001133421


347it [08:01,  1.22s/it]

Scraping CIK 0000849399


348it [08:02,  1.26s/it]

Scraping CIK 0001513761


349it [08:03,  1.17s/it]

Scraping CIK 0001013871


350it [08:04,  1.25s/it]

Scraping CIK 0000073309


351it [08:06,  1.26s/it]

Scraping CIK 0001045810


352it [08:07,  1.27s/it]

Scraping CIK 0000906163


353it [08:08,  1.26s/it]

Scraping CIK 0000898173


354it [08:10,  1.27s/it]

Scraping CIK 0000797468


355it [08:11,  1.26s/it]

Scraping CIK 0000878927


356it [08:12,  1.19s/it]

Scraping CIK 0000029989


357it [08:13,  1.18s/it]

Scraping CIK 0001039684


358it [08:15,  1.36s/it]

Scraping CIK 0001341439


359it [08:16,  1.38s/it]

Scraping CIK 0001781335


360it [08:17,  1.30s/it]

Scraping CIK 0000075362


361it [08:19,  1.31s/it]

Scraping CIK 0000075677


362it [08:20,  1.24s/it]

Scraping CIK 0000076334


363it [08:24,  2.09s/it]

Scraping CIK 0000723531


364it [08:26,  1.96s/it]

Scraping CIK 0001590955


365it [08:27,  1.80s/it]

Scraping CIK 0001633917


366it [08:29,  1.83s/it]

Scraping CIK 0000077360


367it [08:30,  1.77s/it]

Scraping CIK 0001378946


368it [08:32,  1.72s/it]

Scraping CIK 0000077476


369it [08:35,  2.00s/it]

Scraping CIK 0000031791


370it [08:36,  1.80s/it]

Scraping CIK 0001585364


371it [08:37,  1.61s/it]

Scraping CIK 0000078003


372it [08:39,  1.67s/it]

Scraping CIK 0001413329


373it [08:40,  1.53s/it]

Scraping CIK 0001534701


374it [08:41,  1.40s/it]

Scraping CIK 0000764622


375it [08:43,  1.48s/it]

Scraping CIK 0001038357


376it [08:44,  1.37s/it]

Scraping CIK 0000713676


377it [08:47,  1.70s/it]

Scraping CIK 0000079879


378it [08:49,  1.81s/it]

Scraping CIK 0000922224


379it [08:52,  2.14s/it]

Scraping CIK 0001126328


380it [08:54,  2.10s/it]

Scraping CIK 0000080424


381it [08:55,  1.91s/it]

Scraping CIK 0000080661


382it [08:57,  1.88s/it]

Scraping CIK 0001045609


383it [08:59,  1.98s/it]

Scraping CIK 0001137774


384it [09:02,  2.15s/it]

Scraping CIK 0000788784


385it [09:04,  2.17s/it]

Scraping CIK 0001393311


386it [09:06,  2.15s/it]

Scraping CIK 0000822416


387it [09:07,  1.90s/it]

Scraping CIK 0000078239


388it [09:08,  1.71s/it]

Scraping CIK 0001604778


389it [09:09,  1.47s/it]

Scraping CIK 0001050915


390it [09:11,  1.39s/it]

Scraping CIK 0000804328


391it [09:12,  1.29s/it]

Scraping CIK 0001022079


392it [09:13,  1.25s/it]

Scraping CIK 0001037038


393it [09:14,  1.36s/it]

Scraping CIK 0000720005


394it [09:16,  1.48s/it]

Scraping CIK 0000101829


395it [09:18,  1.63s/it]

Scraping CIK 0000726728


396it [09:20,  1.57s/it]

Scraping CIK 0000910606


397it [09:21,  1.62s/it]

Scraping CIK 0000872589


398it [09:23,  1.52s/it]

Scraping CIK 0001281761


399it [09:24,  1.61s/it]

Scraping CIK 0001060391


400it [09:26,  1.71s/it]

Scraping CIK 0000943819


401it [09:28,  1.66s/it]

Scraping CIK 0000315213


402it [09:29,  1.51s/it]

Scraping CIK 0001024478


403it [09:30,  1.40s/it]

Scraping CIK 0000084839


404it [09:31,  1.33s/it]

Scraping CIK 0000882835


405it [09:33,  1.35s/it]

Scraping CIK 0000745732


406it [09:35,  1.47s/it]

Scraping CIK 0000884887


407it [09:36,  1.43s/it]

Scraping CIK 0000064040


408it [09:37,  1.36s/it]

Scraping CIK 0001108524


409it [09:38,  1.28s/it]

Scraping CIK 0001034054


410it [09:40,  1.42s/it]

Scraping CIK 0000087347


411it [09:41,  1.37s/it]

Scraping CIK 0001137789


412it [09:42,  1.34s/it]

Scraping CIK 0001012100


413it [09:45,  1.56s/it]

Scraping CIK 0001032208


414it [09:47,  1.79s/it]

Scraping CIK 0001373715


415it [09:48,  1.59s/it]

Scraping CIK 0000089800


416it [09:50,  1.60s/it]

Scraping CIK 0001063761


417it [09:52,  1.86s/it]

Scraping CIK 0000004127


418it [09:54,  1.98s/it]

Scraping CIK 0001040971


419it [09:56,  1.95s/it]

Scraping CIK 0000091440


420it [09:58,  1.81s/it]

Scraping CIK 0000092122


421it [09:59,  1.68s/it]

Scraping CIK 0000092380


422it [10:01,  1.80s/it]

Scraping CIK 0000093556


423it [10:02,  1.64s/it]

Scraping CIK 0000829224


424it [10:04,  1.56s/it]

Scraping CIK 0000093751


425it [10:05,  1.59s/it]

Scraping CIK 0001757898


426it [10:06,  1.38s/it]

Scraping CIK 0000310764


427it [10:08,  1.43s/it]

Scraping CIK 0000719739


428it [10:11,  1.85s/it]

Scraping CIK 0001601712


429it [10:13,  1.84s/it]

Scraping CIK 0000883241


430it [10:15,  2.16s/it]

Scraping CIK 0000096021


431it [10:18,  2.28s/it]

Scraping CIK 0001283699


432it [10:20,  2.06s/it]

Scraping CIK 0001113169


433it [10:21,  1.79s/it]

Scraping CIK 0000946581


434it [10:22,  1.69s/it]

Scraping CIK 0001116132


435it [10:23,  1.50s/it]

Scraping CIK 0000027419


436it [10:25,  1.49s/it]

Scraping CIK 0001385157


437it [10:26,  1.43s/it]

Scraping CIK 0001681459


438it [10:27,  1.26s/it]

Scraping CIK 0000096943


439it [10:28,  1.32s/it]

Scraping CIK 0000097476


440it [10:30,  1.39s/it]

Scraping CIK 0000217346


441it [10:32,  1.47s/it]

Scraping CIK 0000097745


442it [10:34,  1.65s/it]

Scraping CIK 0000098246


443it [10:36,  1.99s/it]

Scraping CIK 0000109198


444it [10:38,  1.99s/it]

Scraping CIK 0000916365


445it [10:40,  1.79s/it]

Scraping CIK 0001466258


446it [10:41,  1.53s/it]

Scraping CIK 0001260221


447it [10:42,  1.42s/it]

Scraping CIK 0000086312


448it [10:43,  1.51s/it]

Scraping CIK 0000092230


449it [10:47,  1.97s/it]

Scraping CIK 0001418091


450it [10:48,  1.96s/it]

Scraping CIK 0000100493


451it [10:51,  2.08s/it]

Scraping CIK 0000074208


452it [10:54,  2.35s/it]

Scraping CIK 0001403568


453it [10:55,  2.00s/it]

Scraping CIK 0000036104


454it [10:58,  2.35s/it]

Scraping CIK 0001336917


455it [11:00,  2.06s/it]

Already scraped CIK 0001336917
Scraping CIK 0000100885


457it [11:02,  1.81s/it]

Scraping CIK 0000100517


458it [11:04,  1.81s/it]

Scraping CIK 0000731766


459it [11:05,  1.67s/it]

Scraping CIK 0001090727


460it [11:06,  1.51s/it]

Scraping CIK 0001067701


461it [11:07,  1.41s/it]

Scraping CIK 0000352915


462it [11:09,  1.35s/it]

Scraping CIK 0000005513


463it [11:10,  1.34s/it]

Scraping CIK 0000103379


464it [11:11,  1.30s/it]

Scraping CIK 0001035002


465it [11:12,  1.29s/it]

Scraping CIK 0000203527


466it [11:14,  1.33s/it]

Scraping CIK 0000740260


467it [11:15,  1.33s/it]

Scraping CIK 0001014473


468it [11:16,  1.28s/it]

Scraping CIK 0001442145


469it [11:17,  1.21s/it]

Scraping CIK 0000732712


470it [11:19,  1.38s/it]

Scraping CIK 0000875320


471it [11:21,  1.47s/it]

Scraping CIK 0000813828


472it [11:22,  1.44s/it]

Scraping CIK 0001403161


473it [11:24,  1.43s/it]

Scraping CIK 0000899689


474it [11:25,  1.49s/it]

Scraping CIK 0001396009


475it [11:27,  1.44s/it]

Scraping CIK 0000011544


476it [11:28,  1.47s/it]

Scraping CIK 0000943452


477it [11:30,  1.71s/it]

Scraping CIK 0000104169


478it [11:32,  1.83s/it]

Scraping CIK 0001618921


479it [11:34,  1.60s/it]

Scraping CIK 0001744489


480it [11:35,  1.43s/it]

Scraping CIK 0000823768


481it [11:37,  1.60s/it]

Scraping CIK 0001000697


482it [11:39,  1.72s/it]

Scraping CIK 0000783325


483it [11:40,  1.72s/it]

Scraping CIK 0000072971


484it [11:43,  2.15s/it]

Scraping CIK 0000766704


485it [11:46,  2.18s/it]

Scraping CIK 0000105770


486it [11:47,  1.95s/it]

Scraping CIK 0000106040


487it [11:49,  1.94s/it]

Scraping CIK 0001365135


488it [11:50,  1.70s/it]

Scraping CIK 0001732845


489it [11:51,  1.50s/it]

Scraping CIK 0000106535


490it [11:53,  1.51s/it]

Scraping CIK 0000106640


491it [11:55,  1.74s/it]

Scraping CIK 0000107263


492it [11:57,  1.72s/it]

Scraping CIK 0001140536


493it [11:58,  1.62s/it]

Scraping CIK 0001174922


494it [12:00,  1.83s/it]

Scraping CIK 0000072903


495it [12:02,  1.84s/it]

Scraping CIK 0001770450


496it [12:04,  1.81s/it]

Scraping CIK 0000743988


497it [12:05,  1.69s/it]

Scraping CIK 0001524472


498it [12:07,  1.61s/it]

Scraping CIK 0001041061


499it [12:08,  1.59s/it]

Scraping CIK 0000877212


500it [12:10,  1.57s/it]

Scraping CIK 0001136869


501it [12:11,  1.47s/it]

Scraping CIK 0000109380


503it [12:13,  1.46s/it]

Already scraped CIK 0001555280





In [624]:
ticker_cik_df

Unnamed: 0_level_0,cik,10-K Filing Date,10-K Period,10-Q Filing Date,10-Q Period
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mmm,0000066740,2020-02-13,2019-12-31,2020-05-06,2020-03-31
abt,0000001800,2020-02-13,2019-12-31,2020-05-06,2020-03-31
abbv,0001551152,2020-02-13,2019-12-31,2020-05-06,2020-03-31
abmd,0000815094,2020-02-13,2019-12-31,2020-05-06,2020-03-31
acn,0001467373,2020-02-13,2019-12-31,2020-05-06,2020-03-31
atvi,0000718877,2020-02-13,2019-12-31,2020-05-06,2020-03-31
adbe,0000796343,2020-02-13,2019-12-31,2020-05-06,2020-03-31
amd,0000002488,2020-02-13,2019-12-31,2020-05-06,2020-03-31
aap,0001158449,2020-02-13,2019-12-31,2020-05-06,2020-03-31
aes,0000874761,2020-02-13,2019-12-31,2020-05-06,2020-03-31


# Scrape text

In [417]:
alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"

def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

In [418]:
def between(cur, end):
    while cur and cur != end:
        if isinstance(cur, NavigableString):
            text = cur.strip()
            if len(text):
                yield text
        cur = cur.next_element

def get_risk_factor_text(ticker, is10K):
    if is10K:
        os.chdir(pathname_10k)
    else:
        os.chdir(pathname_10q)
    cik = cik_dict[ticker]
    os.chdir(cik)
    file_name = os.listdir(".")[0]

    with open(file_name) as file:
        soup = bs.BeautifulSoup(file, "html.parser")
    spans = soup.find_all('span')

    risk_factor_span = None
    after_risk_factor_span = None
    
    for span in spans:
        text = span.get_text()
        pattern1A = re.compile("item 1a(.*)")
        if pattern1A.match(text.lower()):
            risk_factor_span = span
        pattern2 = re.compile("item 2(.*)")
        if pattern2.match(text.lower()):
            after_risk_factor_span = span
            
    if not risk_factor_span:
        return []
            
    risk_factor_texts = [text for text in between(risk_factor_span, staff_comment_span)]

    full_text = ' '.join(risk_factor_texts)
    sentences = split_into_sentences(full_text)
    os.chdir('../..')
    return sentences
    


In [419]:
os.chdir(original_directory)
get_risk_factor_text('aapl',False)

{'sentences': ['Item 1A.',
  'Risk Factors The business, financial condition and operating results of the Company can be affected by a number of factors, whether currently known or unknown, including but not limited to those described in Part I, Item 1A of the 2019 Form 10-K under the heading “Risk Factors,” any one or more of which could, directly or indirectly, cause the Company’s actual financial condition and operating results to vary materially from past, or from anticipated future, financial condition and operating results.',
  'Any of these factors, in whole or in part, could materially and adversely affect the Company’s business, financial condition, operating results and stock price.',
  'Except as set forth below, there have been no material changes to the Company’s risk factors since the 2019 Form 10-K.',
  'The Company’s business, results of operations, financial condition and stock price have been adversely affected and could in the future be materially adversely affected 

In [420]:


ticker_dict = {}
for ticker,cik in cik_dict.items():
    dict_10K = get_risk_factor_text(ticker, is10K = True)
    dict_10Q = get_risk_factor_text(ticker, is10K = False)
    
    dict_to_use = dict_10Q
    sentences = dict_to_use['sentences']
    sentence_scores = {sentence: analyzer.polarity_scores(sentence)['compound'] for sentence in sentences}
    ticker_dict[ticker] = {
        'cik': cik,
        'sentence_scores': sentence_scores
    }
    print()

#print(ticker_dict)
    
        
    

aapl
21
-0.10584761904761908

dal
57
-0.017238596491228084

nvda
75
-0.1312148648648649

mar


UnboundLocalError: local variable 'risk_factor_span' referenced before assignment

In [641]:
analyzer = SentimentIntensityAnalyzer()

negative_score = -4.0
positive_score = 4.0
uncertain_score = -2.0

negative_words = pd.read_csv('LoughranMcDonald_SentimentWordLists_Negative.csv', header=None).iloc[:,0]
positive_words = pd.read_csv('LoughranMcDonald_SentimentWordLists_Positive.csv', header=None).iloc[:,0]
uncertain_words = pd.read_csv('LoughranMcDonald_SentimentWordLists_Uncertain.csv', header=None).iloc[:,0]

negative_word_scores = {word: negative_score for word in negative_words}
positive_word_scores = {word: positive_score for word in positive_words}
uncertain_word_scores = {word: uncertain_score for word in uncertain_words}

financial_word_dict = {**negative_word_scores, **positive_word_scores, **uncertain_word_scores}
analyzer.lexicon.update(financial_word_dict)

In [642]:
analyzer.lexicon

{'$:': -1.5,
 '%)': -0.4,
 '%-)': -1.5,
 '&-:': -0.4,
 '&:': -0.7,
 "( '}{' )": 1.6,
 '(%': -0.9,
 "('-:": 2.2,
 "(':": 2.3,
 '((-:': 2.1,
 '(*': 1.1,
 '(-%': -0.7,
 '(-*': 1.3,
 '(-:': 1.6,
 '(-:0': 2.8,
 '(-:<': -0.4,
 '(-:o': 1.5,
 '(-:O': 1.5,
 '(-:{': -0.1,
 '(-:|>*': 1.9,
 '(-;': 1.3,
 '(-;|': 2.1,
 '(8': 2.6,
 '(:': 2.2,
 '(:0': 2.4,
 '(:<': -0.2,
 '(:o': 2.5,
 '(:O': 2.5,
 '(;': 1.1,
 '(;<': 0.3,
 '(=': 2.2,
 '(?:': 2.1,
 '(^:': 1.5,
 '(^;': 1.5,
 '(^;0': 2.0,
 '(^;o': 1.9,
 '(o:': 1.6,
 ")':": -2.0,
 ")-':": -2.1,
 ')-:': -2.1,
 ')-:<': -2.2,
 ')-:{': -2.1,
 '):': -1.8,
 '):<': -1.9,
 '):{': -2.3,
 ');<': -2.6,
 '*)': 0.6,
 '*-)': 0.3,
 '*-:': 2.1,
 '*-;': 2.4,
 '*:': 1.9,
 '*<|:-)': 1.6,
 '*\\0/*': 2.3,
 '*^:': 1.6,
 ',-:': 1.2,
 "---'-;-{@": 2.3,
 '--<--<@': 2.2,
 '.-:': -1.2,
 '..###-:': -1.7,
 '..###:': -1.9,
 '/-:': -1.3,
 '/:': -1.3,
 '/:<': -1.4,
 '/=': -0.9,
 '/^:': -1.0,
 '/o:': -1.4,
 '0-8': 0.1,
 '0-|': -1.2,
 '0:)': 1.9,
 '0:-)': 1.4,
 '0:-3': 1.5,
 '0:03': 1.9,
 '