## Scraping Financial Data from the SEC

There's a ton of financial data, but it can be a pain to access. Most people would not manually click through SEC filings.

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import json

## tickers get added all the time, the SEC provides this resource to map CIK numbers to tickers
## honestly, I couldn't find a json version so for the sake of lookup runtime, we can map all public companies here.

def update_ticker_to_CIK():
    
    url = r"https://www.sec.gov/include/ticker.txt"
    r = requests.get(url)
    CIK_map = dict(item.split('\t') for item in r.text.split('\n'))
    
    with open('financial_data/CIK_map.json', 'w', encoding='utf-8') as f:
        json.dump(CIK_map, f, ensure_ascii=False, indent=4)
    
    return(CIK_map)
        
CIK_map = update_ticker_to_CIK()

In [3]:
def search_ticker(tick):
    
    # define the base url needed to create the file url.
    base_url = r"https://www.sec.gov"

    # convert a normal url to a document url
    normal_url = r"https://www.sec.gov/Archives/edgar/data/{}".format(CIK_map[tick])
    content = requests.get(normal_url)
    soup = BeautifulSoup(content.text, 'html.parser')
    
    for link in soup.find_all('a', href=True):
        print(link['href'])

In [4]:
## Example
search_ticker('tsla')

/index.htm
/search/search.htm
#main-content
/about.shtml
/about/whatwedo.shtml
/about/commissioner.shtml
/about/laws.shtml
/about/secreports.shtml
/jobs.shtml
/contact.shtml
/divisions.shtml
/divisions/corpfin.shtml
/divisions/enforce.shtml
/divisions/investment.shtml
/divisions/riskfin.shtml
/divisions/marketreg.shtml
/about/offices/ocie.shtml
/divisions.shtml
/litigation.shtml
/litigation/litreleases.shtml
/litigation/admin.shtml
/litigation/opinions.shtml
/divisions/enforce/friactions.shtml
/litigation/suspensions.shtml
/news/newsroom/howinvestigationswork.html
/alj.shtml
/rules.shtml
/rules/proposed.shtml
/rules/final.shtml
/rules/interim-final-temp.shtml
/rules/other.shtml
/rules/sro.shtml
/interps.shtml
/investor.shtml
http://investor.gov/
/investor/brokers.htm
/investor/alerts.shtml
/answers.shtml
/complaint/select.shtml
/investor/pubs.shtml
/edgar.shtml
/edgar/quickedgar.htm
/edgar/searchedgar/companysearch.html
/about/forms/secforms.htm
/answers/publicdocs.htm
/news/newsroom/i