In [None]:
# default_exp recentFeed

# recentFeed

> Parse the SEC's recent filings feed.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import re
import xml.etree.cElementTree as cElTree

from secscan import utils

In [None]:
#export

def secMostRecentListUrl(count=100) :
    "Returns the URL for the SEC's atom-format feed of most recent filings."
    return ('/cgi-bin/browse-edgar?'
            +('' if count is None else f'count={count}&')
            +'action=getcurrent&output=atom')

def printXmlParseWarning(msg,el) :
    print('***',msg,'***')
    print(cElTree.tostring(el))
    print('************************')

titlePat = re.compile(
        r"\s*(.+?)\s+-" # formType, ignoring surrounding whitespace
        + r"\s+(.+?)\s*" # cikName, ignoring surrounding whitespace
        + r"\((\d{10})\)") # cik
filedPat = re.compile(
        r"filed\D+?\s(\d\d\d\d[-/]?\d\d[-/]?\d\d)\s.*"
        + r"accno\D+?\s(\d[-\d]+\d)\s",
        re.IGNORECASE)
def getRecentChunk(count=100) :
    """
    Parses the SEC's atom-format feed of most recent filings and returns a list of tuples:
        [(fileDate, cik, cikName, formType, accNo),
         ... ]
    with the most recent filings first
    """
    mrListXml = cElTree.fromstring(utils.downloadSecUrl(secMostRecentListUrl(count=count)))
    res = []
    for listEntry in mrListXml :
        if not listEntry.tag.lower().endswith("entry") :
            continue
        cik = formType = accNo = fDate = cikName = None
        for entryItem in listEntry :
            itemTag = entryItem.tag.lower()
            if itemTag.endswith('title') :
                # print('"'+entryItem.text.strip()+'"')
                m = titlePat.match(entryItem.text)
                if m is None :
                    printXmlParseWarning('unable to parse title element',listEntry)
                    continue
                formType,cikName,cik = m.groups()
                cik = cik.lstrip('0')
                # print(repr(formType),repr(cikName),repr(cik))
            elif itemTag.endswith('summary') :
                # print('"'+entryItem.text.strip()+'"')
                m = filedPat.search(entryItem.text)
                if m is None :
                    printXmlParseWarning('unable to parse summary element',listEntry)
                    continue
                fDate,accNo = m.groups()
                # print(repr(fDate),repr(accNo))
        fTup = (fDate, cik, formType, accNo, cikName)
        if all(fTup) :
            res.append(fTup)
    return res

Test downloading and parsing the recent filings feed:

In [None]:
l = getRecentChunk()
utils.printSamp(l,5)
assert len(l) == 100

0 ('1787088', '4', '0000899243-21-022205', '2021-06-04', 'Wellington Hadley Harbor Master Investors (Cayman) III L.P.')
1 ('1431695', '4', '0000899243-21-022205', '2021-06-04', 'Olo Inc.')
2 ('1797168', '4', '0001437749-21-014199', '2021-06-04', 'Acuitas Group Holdings, LLC')
3 ('1136174', '4', '0001437749-21-014199', '2021-06-04', 'Ontrak, Inc.')
4 ('904534', '4', '0001437749-21-014199', '2021-06-04', 'PEIZER TERREN S')


In [None]:
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()