## Scraping Financial Data from the SEC

There's a ton of financial data, but it can be a pain to access. Most people would not manually click through SEC filings.

In [50]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import json

## tickers get added all the time, the SEC provides this resource to map CIK numbers to tickers
## honestly, I couldn't find a json version so for the sake of lookup runtime, we can map all public companies here.

def update_ticker_to_CIK():
    
    url = r"https://www.sec.gov/include/ticker.txt"
    r = requests.get(url)
    CIK_map = dict(item.split('\t') for item in r.text.split('\n'))
    
    with open('financial_data/CIK_map.json', 'w', encoding='utf-8') as f:
        json.dump(CIK_map, f, ensure_ascii=False, indent=4)
    
    return(CIK_map)
        
CIK_map = update_ticker_to_CIK()

In [81]:
def search_links_ticker(tick):
    
    CIK = CIK_map[tick]
    
    ticker_map = {"ticker": tick, "CIK": CIK}
    
    # define the base url needed to create the file url.
    base_url = r"https://www.sec.gov"

    # convert a normal url to a document url
    normal_url = r"https://www.sec.gov/Archives/edgar/data/{}".format(CIK)
    content = requests.get(normal_url)
    soup = BeautifulSoup(content.text, 'html.parser')
    filings = []
    
    for link in soup.find_all('a', href=True):
        if "/Archives/edgar/data/" in link['href']:
            filings_url = base_url + link['href']
            filings.append(filings_url)
    
    for link in filings[:50]:
        print(f"Link to Folder: {link}")
        content = requests.get(link)
        soup = BeautifulSoup(content.text, 'html.parser')
        for file in soup.find_all('a', href=True):
            ext = file['href']
            if "/Archives/" in ext:
                print(base_url+ext)

In [82]:
## Example
search_links_ticker('tsla')

Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000179056520000016
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000016/0001790565-20-000016-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000016/0001790565-20-000016-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000016/0001790565-20-000016.txt
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000016/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000177134020000010
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000010/0001771340-20-000010-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000010/0001771340-20-000010-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000010/0001771340-20-000010.txt
https://www.sec.gov/Archives/edgar/data/1318605/00017713402000

https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000008/0001771340-20-000008-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000008/0001771340-20-000008-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000008/0001771340-20-000008.txt
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000008/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000179056520000014
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000014/0001790565-20-000014-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000014/0001790565-20-000014-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000014/0001790565-20-000014.txt
https://www.sec.gov/Archives/edgar/data/1318605/000179056520000014/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/0

https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000177136420000015/0001771364-20-000015-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000177136420000015/0001771364-20-000015-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000177136420000015/0001771364-20-000015.txt
https://www.sec.gov/Archives/edgar/data/1318605/000177136420000015/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000156459020040042
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000156459020040042/0001564590-20-040042-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020040042/0001564590-20-040042-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020040042/0001564590-20-040042.txt
https://www.sec.gov/Archives/edgar/data/1318605/000156459020040042/g4bbvf13zhrn000001.jpg
https://www.sec.gov/Archives/edgar/data/1318605/0001564

https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000007/0001771340-20-000007-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000007/0001771340-20-000007-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000007/0001771340-20-000007.txt
https://www.sec.gov/Archives/edgar/data/1318605/000177134020000007/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000156459020033069
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000156459020033069/0001564590-20-033069-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020033069/0001564590-20-033069-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020033069/0001564590-20-033069.txt
https://www.sec.gov/Archives/edgar/data/1318605/000156459020033069/0001564590-20-033069-xbrl.zip
https://www.sec.gov/Archives/edgar/data/1318605/

https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000181123020000003/0001811230-20-000003-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000181123020000003/0001811230-20-000003-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000181123020000003/0001811230-20-000003.txt
https://www.sec.gov/Archives/edgar/data/1318605/000181123020000003/edgardoc.xml
Link to Folder: https://www.sec.gov/Archives/edgar/data/1318605/000156459020030017
https://www.sec.gov/Archives/edgar/data/1318605
https://www.sec.gov/Archives/edgar/data/1318605/000156459020030017/0001564590-20-030017-index-headers.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020030017/0001564590-20-030017-index.html
https://www.sec.gov/Archives/edgar/data/1318605/000156459020030017/0001564590-20-030017.txt
https://www.sec.gov/Archives/edgar/data/1318605/000156459020030017/g2ayf3c3tlub000001.jpg
https://www.sec.gov/Archives/edgar/data/1318605/0001564