In [None]:
# default_exp scrape13G

# scrape13G

> Scrape holdings information from 13G SEC filings.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import collections
import itertools
import os
import re

from secscan import utils, dailyList, basicInfo, infoScraper

default13GDir = os.path.join(utils.stockDataRoot,'scraped13G')

13G scraper class - scrape holdings information from the SEC filing:

In [None]:
#export

nSharesPatStr = r'(\d+(?:[,.]\d\d\d)*)'
nPctPatStr = r'(\d+(?:\.\d*)?|\.\d+)'
form13NshAndPctPats = [
    re.compile(r'aggregate\s+amount.{1,100}?' + nSharesPatStr
                + r'.{1,200}?' + r'percent\s+of\s+class.{1,100}?' + nPctPatStr + r'\s*%',
                re.IGNORECASE|re.DOTALL),
    re.compile(r'item\s+9\s*:.*?' + nSharesPatStr
                + r'.*?' + r'item\s+11\s*:.*?' + nPctPatStr + r'\s*%',
                re.IGNORECASE|re.DOTALL),
    re.compile(r'aggregate\s+amount.{1,100}?' + nSharesPatStr
                + r'.{1,200}?' + r'percent\s+of class.{1,100}?(?!\D9\D)\D' + nPctPatStr,
                re.IGNORECASE|re.DOTALL),
]
def getSec13NshAndPctFromText(txt) :
    "Returns a list [(nShares, percent) ... ] parsed from form 13G or 13D."
    for pat in form13NshAndPctPats :
        res = pat.findall(txt)
        if res :
            break
    return res

def parse13G(accNo, formType=None) :
    info = basicInfo.getSecFormInfo(accNo, formType=formType)
    links = info['links']
    if len(links) == 0 :
        print('NO LINKS LIST!')
        info['positions'] = []
    else :
        toFormat = 'text' if links[0][3].endswith('.txt') else 'souptext'
        mainText = utils.downloadSecUrl(links[0][3], toFormat=toFormat)
        info['positions'] = getSec13NshAndPctFromText(mainText)
    if len(info['positions']) == 0 :
        print('no positions found!')
    return info

class scraper13G(infoScraper.scraperBase) :
    def __init__(self, infoDir=default13GDir, startD=None, endD=None, fSuff='m.pkl', **pickle_kwargs) :
        super().__init__(infoDir, 'SC 13G', startD=startD, endD=endD, fSuff=fSuff, **pickle_kwargs)
    def scrapeInfo(self, accNo, formType=None) :
        return parse13G(accNo, formType), None

Test 13G scraper class:

In [None]:
dl = dailyList.dailyList(startD='empty')
dl.updateForDays('20210701','20210704')
assert len(dl.getFilingsList(None,'SC 13G')[0])==100,"testing 13G scraper class (daily list count)"
info = parse13G('0001567619-21-013814', formType='SC 13G')
assert (info['ciks']==['0000016099', '0001373604']
        and info['positions']==[('1350552', '4.36'), ('1582235', '5.10')]
    ),"testing 13G scraper class (parsing)"

20210703 WEEKEND 20210702 ### list index 21 filings for 20210702: 6569 * 20210701 filings for 20210701: 5573 * 

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()