In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime

In [2]:
ticker = 'AAPL'

In [3]:
def get_soup(url, params = {}):
    res = requests.get(url, params)
    soup = BeautifulSoup(res.content, "lxml")
    return soup

def parse_filings_list(soup, filing_type):
    filings = []
    
    table = soup.find('table', {'summary': 'Results'})
    rows = table.find_all('tr')[1:]
    for row in rows:
        cells = row.find_all('td')
        if filing_type != cells[0].text.strip():
            continue
        filing = {}
        filing['Type'] = cells[0].text
        filing['Date'] = cells[3].text
        filings.append(filing)
        
    
    df = pd.DataFrame(filings)
    return df

def get_filings_index(cik):
    sec_url = 'https://www.sec.gov/cgi-bin/browse-edgar'
    
    params = {
        'action': 'getcompany',
        'CIK': cik,
        'owner': 'exclude',
        'count': '40'
    }
    
    filings = []
    for filing_type in ['10-K', '10-Q', '8-K']:
        params['type'] = filing_type
        soup = get_soup(sec_url, params)
        filings_df = parse_filings_list(soup, filing_type)
        filings.append(filings_df)
    combined_df = pd.concat(filings)
    combined_df['Date'] = pd.to_datetime(combined_df['Date'])
    combined_df.sort_values('Date', inplace=True, ascending=False)
    combined_df.reset_index(drop=True, inplace=True)
    combined_df['Value'] = 1
    df = combined_df.pivot(index='Date', columns='Type', values='Value')
    df.sort_index(inplace=True)
    df.fillna(0, inplace=True)
    return df

In [5]:
filings = get_filings_index(ticker)

filings.head()

In [6]:
filings.to_csv('filings.csv')