In [None]:
# default_exp dailyList

# dailyList

> Parse the SEC's archived daily list of filings.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import csv
import re

from secscan import utils,recentFeed

Download and parse the SEC's archived daily list of filings:

In [None]:
#export

def getQStr(dateStr) :
    """
    Converts a date in YYYYMMDD format to YYYY/QTRn/
    where n is the quarter number from 1 to 4."
    """
    return dateStr[:4] + '/QTR' + str((int(dateStr[4:6])+2) // 3) + '/'

def getSecDailyIndexUrl(dateStr, listType='master') :
    return ('/Archives/edgar/daily-index/'
            + getQStr(dateStr) + listType + '.' + dateStr + '.idx')

noListExceptionPat = re.compile(r'\b40[34]\b')
edgarTxtFPat = re.compile(
            # gets cik and accession no from a file url within the daily index
            r"\s*edgar/data/(\d+)" # cik - should be same as on the index line
            + r"/("+utils.accessNoPatStr+r")\.txt\s*$", # accession no
            re.IGNORECASE)

def getDailyFList(d) :
    """
    Returns a list of SEC filed forms:
        [(cik, cikName, formType, fileDate, accNo), ... ]
    for the given date or ISO date string, retrieved from
    the SEC daily index.
    """
    dateStr = utils.toDateStr(d)
    try :
        fListRes = utils.downloadSecUrl(getSecDailyIndexUrl(dateStr))
        #print('.',end='')
    except Exception as e :
        if noListExceptionPat.search(str(e)) >= 0 :
            # no list for that date - probably SEC holiday
            return []
        else : # probably connection error or SEC outage
            raise e
    r = csv.reader(fListRes.splitlines(), delimiter='|')
    res = []
    for entry in r :
        if len(entry)==5 and entry[0].isdigit() :
            cik, cikName, formType, fileDate, txtF = entry
        else :
            if len(res) > 0 :
                print('invalid entry', entry)
            continue
        m = edgarTxtFPat.match(txtF)
        if not m :
            print('missing accession no in', entry)
            continue
        if m.group(1) != cik :
            print('cik mismatch in', entry)
        res.append((cik,cikName,formType,fileDate,m.group(2)))
    print('filings for',dateStr+':', len(res), end=' ')
    return res

Test downloading and parsing SEC's archived daily list of filings:

In [None]:
assert ((getQStr('20200101'),getQStr('20200401'),getQStr('20201201'))
        == ('2020/QTR1/','2020/QTR2/','2020/QTR4/')), "Quarter string calc"
assert (getSecDailyIndexUrl('20201201')
        =='/Archives/edgar/daily-index/2020/QTR4/master.20201201.idx'), "Daily index URL"
r = getDailyFList('20201201')
assert len(r)==3464 and r[-1]== ('97745', 'THERMO FISHER SCIENTIFIC INC.',
                                 '8-K', '20201201', '0000097745-20-000055')

*** Problem downloading https://www.sec.gov/Archives/edgar/daily-index/2020/QTR4/master.20201201.idx - 403 Client Error: Forbidden for url: https://www.sec.gov/Archives/edgar/daily-index/2020/QTR4/master.20201201.idx ; retrying ...
filings for 20201201: 3464 

Update a trio of dicts
```
    dlMap: dateStr -> list of filings [(cik, formType, accNo, fDate,'*'), ... ]
    cikNameMap: cik -> (name, latestDateStr)
    cikOldNamesMap: cik -> [name1, name2, ...]

```
using the SEC's archived daily list of filings:

In [None]:
def findCikName(cikName,oldNames) :
    """
    Checks if a CIK name already appears in a list of old names (case insensitive).
    Returns the position if it appears, else -1.
    """
    cikName = cikName.lower()
    for i,oldName in enumerate(oldNames) :
        if cikName == oldName.lower() :
            return i
    return -1

def updateCikNameMapsFromEntry(dStr, cik, cikName, cikNameMap, cikOldNamesMap) :
    """
    Updates two maps:
        cikNameMap: cik -> (name, latestDateStr)
        cikOldNamesMap: cik -> [oldname1, oldname2, ...]
    to reflect an entry (cik, cikName, ... ) from the index for day dStr.
    """
    if cik not in cikNameMap : # completely new name
        cikNameMap[cik] = (cikName, dStr)
        return
    mapName, mapDStr = cikNameMap[cik]
    if dStr < mapDStr :
        # name in cikNameMap is newer than this entry
        oldName = cikName
    else :
        # this entry is newer than the name in cikNameMap - update the map
        oldName = mapName
        cikNameMap[cik] = (cikName, dStr)
        mapName = cikName
    if oldName==mapName or oldName.lower()==curName.lower() :
        # old and new names are the same
        return
    # update the old names map:
    if cik not in cikOldNamesMap :
        cikOldNamesMap[cik] = [oldName]
        return
    oldNames = cikOldNamesMap[cik]
    if findCikName(oldName, oldNames) < 0 : # add a new old name
        oldNames.append(oldName)
    i = findCikName(curName, oldNames)
    if i >= 0 : # delete current name if it's in the list of old names
        del oldNames[i]

# def getDLMapBetween(startD, endD,
#                     dlMap=None, cikNameMap=None, cikOldNamesMap=None) :
#     """
#     Updates the trio of maps dlMap, cikNameMap, cikOldNamesMap
#     to reflect the filings for dates between startD (inclusive) and
#     endD (exclusive). If startD is None, uses the last date already
#     in dlMap, or the start of the current year if dlMap is empty.
#     If endD is None, uses today.
#     Retrieves the filings info from the SEC daily index using getDailyFList.
#     """
#     if dlMap is None :
#         dlMap = {}
#     if cikNameMap is None :
#         cikNameMap = {}
#     if cikOldNamesMap is None :
#         cikOldNamesMap = {}
#     if startD is None :
#         if len(dlMap) == 0 :
#             startD = toDateStr()[:4]+'0101' # start of current year
#         else :
#             startD = max(dlMap.keys())
#     for d in reversed(dateStrsBetween(startD,endD)) :
#         if d in dlMap :
#             print 'SKIP'+d,
#         else :
#             print d,
#             dlMap[d] = []
#             l = getDailyFList(d)
#             print '*',
#             for cik, cikName, formType, fileDate, accNo in l :
#                 dlMap[d].append((cik,formType,accNo,
#                                  '*' if fileDate==d else fileDate, '*'))
#                 updateCikNameMapsFromEntry(d,cik,cikName,
#                                            cikNameMap,cikOldNamesMap)
#     return dlMap,cikNameMap,cikOldNamesMap


In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()