In [None]:
# default_exp scrape13D

# scrape13D

> Scrape holdings information from 13D SEC filings.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import collections
import itertools
import numpy as np
import os
import re

from secscan import utils, dailyList, basicInfo, infoScraper, scrape13F, scrape13G

default13DDir = os.path.join(utils.stockDataRoot,'scraped13D')

13D scraper class - scrape holdings information from the SEC filing:

In [None]:
#export

class scraper13D(infoScraper.scraperBase) :
    def __init__(self, infoDir=default13DDir, startD=None, endD=None, fSuff='m.pkl', **pickle_kwargs) :
        super().__init__(infoDir, 'SC 13D', startD=startD, endD=endD, fSuff=fSuff, **pickle_kwargs)
    def scrapeInfo(self, accNo, formType=None) :
        return scrape13G.parse13GD(accNo, formType=formType), None

Test 13D scraper class:

In [None]:
dl = dailyList.dailyList(startD='empty')
dl.updateForDays('20210701','20210704')
assert len(dl.getFilingsList(None,'SC 13D')[0])==84,"testing 13D scraper class (daily list count)"
info = scrape13G.parse13GD('0000921895-21-001173', formType='SC 13D/A')
assert (info['ciks']==['0001165002', '0001461945']
        and info['positions']==[('412,925', '5.0'), ('430,214', '5.2'), ('843,139', '10.2'),
                                ('843,139', '10.2'), ('860,416', '10.4'), ('860,416', '10.4')]
        and info['purpose'].startswith('Item 4 is hereby amended and restated to read')
        and info['purpose'].endswith('Item 4 of Schedule 13D. 8 CUSIP No. 961765104')
    ),"testing 13D scraper class (parsing)"

20210703 WEEKEND 20210702 ### list index 22 filings for 20210702: 6569 * 20210701 filings for 20210701: 5573 * 

Combine 13F, 13G, and 13D filings for a quarter:

In [None]:
#export

def get13GDDatesForQ(y, qNo) :
    _,kwargs = scrape13F.getPeriodAndNextQStartEnd(y, qNo)
    kwargs['startD'] = str(int(kwargs['startD'][:4])-1) + kwargs['startD'][4:]
    return kwargs

def getCombNSSForQ(y, qNo, minFrac=0.01, maxFrac=1.0, minStocksPerInv=3, maxStocksPerInv=100,
                   minTop10Frac=0.4, minAUM=None, dtype=np.float64,
                   minInvestorsPerStock=2, maxInvestorsPerStock=None,
                   max13GDBonus=0.2, min13GDBonus=0.02, max13GDCount=100) :
    """
    Calculates a matrix of investor holdings for a quarter, based on all 13F filings filed
    during the succeeding quarter, combined with 13G and 13D filings from the previous year
    up through the succeeding quarter.

    Returns mat, ciks, cusips where mat is a matrix of shape (len(ciks), len(cusips))
    in which each row has the fractions held by the corresponding cik in each cusip.

    If minFrac and/or maxFrac is supplied, restricts to stocks with fraction of
    total portfolio >=minFrac and/or <=maxFrac.

    If minStocksPerInv, maxStocksPerInv, minTop10Frac or minAUM are specified, omits
    investors with too few stocks, too many stocks, too small a fraction in the
    top 10 holdings, or too small a total stock value.
    If minInvestorsPerStock is specified, restricts to stocks with at least that many investors.
    If maxInvestorsPerStock is specified, restricts to stocks with at most that many investors.

    13GD bonus fractions are 1.0/#positions, but restricted to [min13GDBonus..max13GDBonus]
    If max13GDCount is not None, restricts to investors with at most max13GDCount combined 13G
    and 13D positions.
    """
    dates = get13GDDatesForQ(y,qNo)
    scrapedL = [scrape13G.scraper13G(**dates), scraper13D(**dates)]
    cik13GDPosMap = scrape13G.updateCik13GDPos(scrapedL)
    cikBonusMap = scrape13G.calcBonusMap(cik13GDPosMap, max13GDBonus=max13GDBonus, min13GDBonus=min13GDBonus,
                                         max13GDCount=max13GDCount)
    return scrape13F.getNSSForQ(y, qNo, minFrac=minFrac, maxFrac=maxFrac,
                                minStocksPerInv=minStocksPerInv, maxStocksPerInv=maxStocksPerInv,
                                minTop10Frac=minTop10Frac, minAUM=minAUM, dtype=dtype,
                                minInvestorsPerStock=minInvestorsPerStock,
                                maxInvestorsPerStock=maxInvestorsPerStock,
                                extraHoldingsMaps=[cikBonusMap])

In [None]:
# m = getCombNSSForQ(2021, 1, max13GDCount=50)

No event date in 0000814133-20-000050; using 2020-03-31
No event date in 0000814133-20-000049; using 2020-03-31
No event date in 0000814133-20-000051; using 2020-03-31
No CUSIP in 0001193125-20-101055
No event date in 0000071633-20-000016; using 2020-04-01
No event date in 0000071633-20-000017; using 2020-04-01
No event date in 0000071633-20-000018; using 2020-04-01
No event date in 0000071633-20-000019; using 2020-04-01
No event date in 0000071633-20-000020; using 2020-04-01
*** No positions found in 0001104659-20-044338
No event date in 0001140361-20-008343; using 2020-04-01
No event date in 0001140361-20-008347; using 2020-04-01
No CUSIP in 0000834237-20-009334
No CUSIP in 0000834237-20-009337
No CUSIP in 0000834237-20-009361
No CUSIP in 0000834237-20-009336
No CUSIP in 0000834237-20-009354
No CUSIP in 0000834237-20-009381
No CUSIP in 0001422848-20-000110
No CUSIP in 0001422849-20-000133
No event date in 0000866842-20-000004; using 2020-04-02
No event date in 0000318989-20-000068; u

*** No positions found in 0001072613-20-000155
*** ERROR in  0001567619-20-007639
No CUSIP in 0001437749-20-007781
*** No positions found in 0001193125-20-110664
*** No positions found in 0001079973-20-000328
No event date in 0001493152-20-007486; using 2020-04-24
No CUSIP in 0001104659-20-056666
No event date in 0001493152-20-007889; using 2020-04-30
No CUSIP in 0001104659-20-058005
No event date in 0001104659-20-058917; using 2020-05-01
No event date in 0001493152-20-008005; using 2020-05-01
No CUSIP in 0001683168-20-001488
*** No positions found in 0001104659-20-062674
*** No positions found in 0001104659-20-063178
*** No positions found in 0001104659-20-063182
No CUSIP in 0001493152-20-009325
No CUSIP in 0001504304-20-000037
No CUSIP in 0001140361-20-011974
No event date in 0001718111-20-000002; using 2020-05-15
No event date in 0001718111-20-000003; using 2020-05-15
No CUSIP in 0000919574-20-003809
No CUSIP in 0001504304-20-000038
*** No positions found in 0000928464-20-000023
No 

min stock fraction of portfolio 0.01
max stock fraction of portfolio 1.0
min stocks per investor 3
max stocks per investor 100
min fraction of portfolio in top 10 positions 0.4
period 2021-03-31 - total of 5911 ciks, 6039 13F filings
CIK 1731061 1 - 2 [('20210408', '0001104659-21-047884', 829), ('20210506', '0001104659-21-062185', 895)]
CIK 1386929 1 - 2 [('20210412', '0001386929-21-000005', 79), ('20210414', '0001386929-21-000006', 79)]
CIK 1105410 1 - 2 [('20210413', '0001214659-21-004137', 50), ('20210421', '0001214659-21-004362', 50)]
CIK 1566601 1 - 2 [('20210413', '0001566601-21-000005', 764), ('20210414', '0001566601-21-000006', 764)]
CIK 1573767 1 - 2 [('20210413', '0001573767-21-000006', 49), ('20210505', '0001573767-21-000007', 66)]
CIK 1840740 1 - 2 [('20210413', '0001840740-21-000005', 51), ('20210414', '0001840740-21-000006', 51)]
CIK 1053994 1 - 2 [('20210414', '0001085146-21-001185', 184), ('20210419', '0001085146-21-001232', 296)]
CIK 1381055 2 - 3 [('20210416', '000138

CIK 1063296 1 - 2 [('20210517', '0000905718-21-000694', 10), ('20210518', '0000905718-21-000710', 10)]
CIK 1080383 1 - 2 [('20210517', '0000919574-21-003508', 53), ('20210518', '0000919574-21-003778', 55)]
CIK 1086619 1 - 2 [('20210517', '0001567619-21-010327', 3238), ('20210528', '0001567619-21-011331', 3238)]
CIK 1119376 1 - 2 [('20210517', '0001567619-21-010351', 38), ('20210518', '0001567619-21-010595', 40)]
CIK 1164688 1 - 2 [('20210517', '0001172661-21-001214', 16), ('20210521', '0001172661-21-001373', 16)]
CIK 1224962 1 - 2 [('20210517', '0001012975-21-000229', 138), ('20210610', '0001012975-21-000242', 139)]
CIK 1332784 1 - 2 [('20210517', '0001567619-21-010483', 12), ('20210623', '0001567619-21-012524', 12)]
CIK 1387369 1 - 2 [('20210517', '0000919574-21-003641', 39), ('20210517', '0000919574-21-003738', 40)]
CIK 1393818 1 - 2 [('20210517', '0000950123-21-007113', 500), ('20210519', '0000950123-21-007183', 500)]
CIK 1423053 1 - 3 [('20210517', '0000950123-21-007021', 16138), (

In [None]:
# m = scrape13F.getNSSForQ(2021,1)

min stock fraction of portfolio 0.01
max stock fraction of portfolio 1.0
min stocks per investor 3
max stocks per investor 100
min fraction of portfolio in top 10 positions 0.4
period 2021-03-31 - total of 5911 ciks, 6039 13F filings
CIK 1731061 1 - 2 [('20210408', '0001104659-21-047884', 829), ('20210506', '0001104659-21-062185', 895)]
CIK 1386929 1 - 2 [('20210412', '0001386929-21-000005', 79), ('20210414', '0001386929-21-000006', 79)]
CIK 1105410 1 - 2 [('20210413', '0001214659-21-004137', 50), ('20210421', '0001214659-21-004362', 50)]
CIK 1566601 1 - 2 [('20210413', '0001566601-21-000005', 764), ('20210414', '0001566601-21-000006', 764)]
CIK 1573767 1 - 2 [('20210413', '0001573767-21-000006', 49), ('20210505', '0001573767-21-000007', 66)]
CIK 1840740 1 - 2 [('20210413', '0001840740-21-000005', 51), ('20210414', '0001840740-21-000006', 51)]
CIK 1053994 1 - 2 [('20210414', '0001085146-21-001185', 184), ('20210419', '0001085146-21-001232', 296)]
CIK 1381055 2 - 3 [('20210416', '000138

CIK 1119376 1 - 2 [('20210517', '0001567619-21-010351', 38), ('20210518', '0001567619-21-010595', 40)]
CIK 1164688 1 - 2 [('20210517', '0001172661-21-001214', 16), ('20210521', '0001172661-21-001373', 16)]
CIK 1224962 1 - 2 [('20210517', '0001012975-21-000229', 138), ('20210610', '0001012975-21-000242', 139)]
CIK 1332784 1 - 2 [('20210517', '0001567619-21-010483', 12), ('20210623', '0001567619-21-012524', 12)]
CIK 1387369 1 - 2 [('20210517', '0000919574-21-003641', 39), ('20210517', '0000919574-21-003738', 40)]
CIK 1393818 1 - 2 [('20210517', '0000950123-21-007113', 500), ('20210519', '0000950123-21-007183', 500)]
CIK 1423053 1 - 3 [('20210517', '0000950123-21-007021', 16138), ('20210521', '0000950123-21-007261', 16138), ('20210521', '0000950123-21-007263', 3)]
CIK 1595082 0 - 2 [('20210517', '0001595082-21-000039', 453), ('20210527', '0001595082-21-000041', 1)]
CIK 1604350 1 - 2 [('20210517', '0001567619-21-010535', 24), ('20210521', '0001567619-21-010963', 24)]
CIK 1621855 2 - 3 [('2

In [None]:
#hide
# assorted test code: 

# dl = dailyList.dailyList(startD='20200101')

# s = scraper13D(startD='empty')
# s.updateForDays(dl,startD='20210702',endD='20210703')
# print(); s.printCounts()

# dailyList.dlCountFilings(startD='20210726',endD='20210731',formClass='13F-HR',noAmend=False)

# accNo = '0001174947-20-001195'
# i = parse13D(accNo,'SC 13D')
# i

# for accNo,info in s.infoMap['20210702'].items() :
#     print(accNo,info['positions'])

# b = utils.downloadSecUrl('',toFormat='souptext')
# utils.secBrowse('0001104659-21-079401')
# scrape13G.parse13GD('0001171520-19-000289')

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()