In [None]:
# default_exp scrape13F

# scrape13F

> Scrape investor holdings from 13F-HR SEC filings.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import collections
import itertools
import numpy as np
import os
import re

from secscan import utils, dailyList, basicInfo, infoScraper

default13FDir = os.path.join(utils.stockDataRoot,'scraped13F')

13F-HR scraper class - scrape table of investor holdings from XML format in the SEC filing:

In [None]:
#export

def findChildEndingWith(el,tagEnd) :
    "Finds first child of an XML element with tag ending in tagEnd (case insensitive)."
    tagEnd = tagEnd.lower()
    for child in el :
        if child.tag.lower().endswith(tagEnd) :
            return child
    return None

def findChildSeries(el,tagEnds) :
    "Finds a nested series of children by tag using findChildEndingWith"
    for tagEnd in tagEnds :
        el = findChildEndingWith(el,tagEnd)
    return el

callOptPat = re.compile(r'call\s*opt',re.IGNORECASE)
putOptPat = re.compile(r'put\s*opt',re.IGNORECASE)

def getRowInfo(row) :
    """
    Returns information for a row in a 13F table in the form:
        (cusip, name, value, title, count, putCall)
    where the field values are as given in the table,
    except putCall is 'CALL', 'PUT', or ''.
    """
    cusip = findChildEndingWith(row,'cusip').text.upper().strip()
    name = findChildEndingWith(row,'issuer').text.strip()
    value = findChildEndingWith(row,'value').text.strip()
    title = findChildEndingWith(row,'titleOfClass').text.upper().strip()
    shrsOrPrnEl = findChildEndingWith(row,'shrsOrPrnAmt')
    count = findChildEndingWith(shrsOrPrnEl,'sshPrnamt').text.strip()
    #countType = findChildEndingWith(shrsOrPrnEl,'sshPrnamtType').text.upper()
    putCallEl = findChildEndingWith(row,'putCall')
    if putCallEl is None :
        putCallEl = findChildEndingWith(shrsOrPrnEl,'putCall')
        #if putCallEl is not None :
        #    print('found putcall in shrsOrPrn')
    if putCallEl is not None :
        putCall = putCallEl.text.upper().strip()
    elif callOptPat.search(name) or title.startswith('CALL') or title=='CAL' :
        putCall = 'CALL'
    elif putOptPat.search(name) or title.startswith('PUT') :
        putCall = 'PUT'
    else :
        putCall = ''
    return (cusip, name, value, title, count, putCall)

def parse13FHoldings(accNo, formType=None) :
    """
    Parses a 13F filing, returning the result in the form:
    {
        'period': 'YYYY-MM-DD',
        'acceptDate': 'YYYY-MM-DD',
        'acceptTime': 'HH:MM:SS',
        'cik' : 'DDDDDDDDDD',
        'holdings': [(cusip, name, value, title, count, putCall), ... ]
    }
    where the field values are as given in the table,
    except putCall is 'CALL', 'PUT', or ''.
    """
    info = basicInfo.getSecFormInfo(accNo, formType)
    xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')]
    if len(xmlUrls) == 1 :
        xmlSummTab = utils.downloadSecUrl(xmlUrls[0],toFormat='xml')
        tot = int(findChildSeries(xmlSummTab,['formdata','summarypage','tableentrytotal']).text.strip())
        if tot == 0 :
            print('*** zero total, table not present')
        else :
            print('*** nonzero total, but table not present')
        holdings = []
    else :
        xmlTab = utils.downloadSecUrl(xmlUrls[-1],toFormat='xml')
        tabRows = [tabRow for tabRow in xmlTab
                   if tabRow.tag.lower().endswith('infotable')]
        if len(xmlTab) != len(tabRows) :
            print('*** #rows mismatch',len(xmlTab),'all children',len(tabRows),'table rows')
        if len(tabRows) == 0 :
            print('*** no holdings in table')
        holdings = [getRowInfo(tabRow) for tabRow in tabRows]
    if len(info['ciks']) != 1 :
        print('*** unexpected number of CIKs!=1',info['ciks'])
    return {'period': info['period'],
            'acceptDate': info['acceptDate'],
            'acceptTime': info['acceptTime'],
            'cik': info['ciks'][0],
            'holdings': holdings}

class scraper13F(infoScraper.scraperBase) :
    def __init__(self, infoDir=default13FDir, startD=None, endD=None, fSuff='m.pkl', **pickle_kwargs) :
        super().__init__(infoDir, '13F-HR', startD=startD, endD=endD, fSuff=fSuff, **pickle_kwargs)
    def scrapeInfo(self, accNo, formType=None) :
        return parse13FHoldings(accNo, formType), None

Test 13F-HR scraper class:

In [None]:
dl = dailyList.dailyList(startD='empty')
dl.updateForDays('20210614','20210615')
assert len(dl.getFilingsList(None,'13F-HR')[0])==4, "testing 13F scraper class (daily list count)"

s = scraper13F(startD='empty')
s.updateForDays(dl,'20210614','20210615')
hList = s.infoMap['20210614']['0001104659-21-080656']['holdings']
assert (len(hList)==39
        and hList[0]==('00289Y107', 'Abeona Therapeutics Inc', '5376', 'COM', '2859767', '')
        and hList[-1]==('82835P103', 'SILVERCORP METALS INC', '179', 'COM', '36500', '')
       ), "testing 13F scraper class (parsing holdings table)"



Generate a combined holdings matrix based on all 13F-HR filings in a quarter:

In [None]:
#export

def condenseHoldings(holdings, minFrac=0.0, maxFrac=1.0,
                     pctFormat=False, includeName=False, cusipNames={},
                     minStocksPerInv=None, maxStocksPerInv=None, minTop10Frac=None, minAUM=None,
                     allCusipCounter=None, allHoldingsMap=None, forCik=None) :
    """
    Converts a list of of stock and option holdings as parsed from the 13F:
        [(cusip, name, value, title, count, putCall), ... ]
    that may have multiple entries per stock into a condensed list that omits
    call/put options and only has one combined entry per stock:
        [(cusip, val, frac) ... ]
    sorted in descending order by value, and restricted to stocks with fraction
    of total portfolio in [minFrac..maxFrac]

    If pctFormat is True, frac is returned as a string in the format N.NN%
    If includeName is True, the cusip name is also returned:
        [(cusip, name, val, frac) ... ]

    If minStocksPerInv, maxStocksPerInv, minTop10Frac or minAUM are specified, returns None
    for lists with too few stocks, too many stocks, too small a fraction in the
    top 10 stocks, or too small a total value.

    If supplied, allCusipCounter should be a Counter, and it will be updated to count
    all investors that have any position in each stock, without regard to the min/max
    options supplied to restrict the holdings list.

    If supplied, allHoldingsMap should be a dict, and a full sorted holdings list:
        [(cusip, val, frac) ... ]
    will be saved in allHoldingsMap[forCik], without regard to the min/max
    options supplied to restrict the holdings list.
    """
    if includeName :
        cusipToName = dict((cusip,name)
                           for cusip, name, value, shType, nShares, putCall in holdings)
    # eliminate options and sort to group holdings by CUSIP (stock identifier):
    holdings = sorted((cusip, float(value))
                      for cusip, name, value, shType, nShares, putCall in holdings
                      if putCall=='')
    # combine into a single entry for each stock with the total for that stock:
    holdings = [(cusip, sum(val for _,val in it))
                for cusip,it in itertools.groupby(holdings, key=lambda x : x[0])]
    # sort to put largest holdings first:
    holdings.sort(key = lambda x : x[1], reverse=True)
    # calculate the fraction of total holdings for each stock:
    totAum = sum(val for _,val in holdings)
    holdings = [(cusip, val, val/totAum if totAum>0.0 else 0.0)
                for cusip,val in holdings]
    if allHoldingsMap is not None :
        allHoldingsMap[forCik] = holdings
    if allCusipCounter is not None :
        allCusipCounter.update(cusip for cusip,_ in holdings)
    # return None if the investor is eliminated by any of the min/max options:
    if ((minStocksPerInv is not None and minStocksPerInv > len(holdings))
            or (maxStocksPerInv is not None and maxStocksPerInv < len(holdings))
            or (minAUM is not None and minAUM > totAum*1000.0)
            or (minTop10Frac is not None
                and minTop10Frac > sum(frac for _,_,frac in holdings[:10]))) :
        return None
    # get the final output list, filtered by min/maxFrac and formatted:
    res = []
    for cusip,val,frac in holdings :
        if frac > maxFrac : # fraction too large - skip holding 
            continue
        if minFrac > frac : # fraction too small - holdings list is sorted so we can stop here 
            break
        fracOut = f'{frac:.2%}' if pctFormat else frac
        if includeName :
            res.append((cusip, cusipNames.get(cusip,cusipToName[cusip]), val, fracOut))
        else :
            res.append((cusip, val, fracOut))
    return res if len(res)>0 else None
#     if allCusipCounter is not None :
#         allCusipCounter.update(cusip for cusip,_ in holdings)
#     if ((minStocksPerInv is not None and minStocksPerInv > len(holdings))
#             or (maxStocksPerInv is not None and maxStocksPerInv < len(holdings))) :
#         return None
#     holdings.sort(key = lambda x : x[1], reverse=True)
#     tot = sum(val for _,val in holdings)
#     if ((minAUM is not None and minAUM > tot*1000.0)
#             or (minTop10Frac is not None and minTop10Frac*tot > sum(val for _,val in holdings[:10]))) :
#         return None
#     res = []
#     for cusip,val in holdings :
#         frac = val/tot if tot>0.0 else 0.0
#         if frac > maxFrac :
#             # skip holdings with fraction too large
#             continue
#         if minFrac > frac :
#             # holdings list is sorted in descending order by fraction, so we can stop here 
#             break
#         fracOut = f'{frac:.2%}' if pctFormat else frac
#         if includeName :
#             res.append((cusip, cusipNames.get(cusip,cusipToName[cusip]), val, fracOut))
#         else :
#             res.append((cusip, val, fracOut))
#     return res

def get13FAmendmentType(accNo, formType=None) :
    """
    Gets the amendment type for a 13F-HR/A filing - may be RESTATEMENT or NEW HOLDINGS.
    This turned out to be unreliable (often missing or wrong), so I don't use it to get
    the combined holdings for an investor. Instead I just look at the number of holdings
    in an amendment compared to the previous filing, and treat it as a restatement
    if the new number of holdings is more than half the old number.
    """
    info = basicInfo.getSecFormInfo(accNo, formType)
    xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')]
    xmlSummTab = utils.downloadSecUrl(xmlUrls[0],toFormat='xml')
    coverPage = findChildSeries(xmlSummTab,['formdata','coverpage'])
    isAmendment = findChildEndingWith(coverPage,'isamendment')
    if isAmendment is None or isAmendment.text.strip().lower() not in ['true','yes'] :
        return None
    return findChildSeries(coverPage,['amendmentinfo','amendmenttype']).text.strip()

def indexMap(lis) :
    "Converts a list to a dict mapping item -> index in the list."
    return dict((el,i) for i,el in enumerate(lis))

def getHoldingsMap(scraped13F, period, minFrac=0.0, maxFrac=1.0,
                   minStocksPerInv=None, maxStocksPerInv=None, minTop10Frac=None, minAUM=None,
                   allCusipCounter=None, allHoldingsMap=None) :
    """
    Consolidate holdings for each CIK based on all filings for a given period into
    a combined map of investor holdings.

    Returns a dict: cik -> {cusip -> pct}

    Restricts to stocks only (no call/put options).

    If minFrac and/or maxFrac is supplied, restricts to stocks with fraction of
    total portfolio >=minFrac and/or <=maxFrac.

    If minStocksPerInv, maxStocksPerInv, minTop10Frac or minAUM are specified, omits
    investors with too few stocks, too many stocks, too small a fraction in the
    top 10 stocks, or too small a total stock value.

    If supplied, allCusipCounter should be a Counter, and it will be updated to count
    all investors that have any position in each stock, without regard to the min/max
    options supplied to restrict the returned holdings map.

    If supplied, allHoldingsMap should be a dict, it will be updated with a full sorted
    holdings list for each CIK:
        allHoldingsMap[cik] = [(cusip, val, frac) ... ]
    without regard to the min/max options supplied to restrict the returned holdings map.
    """
    for v,msg in [(minFrac,'min stock fraction of portfolio'),
                  (maxFrac,'max stock fraction of portfolio'),
                  (minStocksPerInv,'min stocks per investor'),
                  (maxStocksPerInv,'max stocks per investor'),
                  (minTop10Frac,'min fraction of portfolio in top 10 positions'),
                  (minAUM,'min AUM (total portfolio value)')] :
        if v is not None :
            print(msg,v)
    #
    # Map cik to a list [(dateStr, accNo, holdingsList) ... ]
    # of all 13F filings from that cik with the given period.
    cikTo13Fs = collections.defaultdict(list)
    count = 0
    for dStr, accNoToInfo in scraped13F.infoMap.items() :
        for accNo, info in accNoToInfo.items() :
            if info == 'ERROR' :
                print('ERR',accNo)
            elif info['period'] == period :
                cikTo13Fs[info['cik'].lstrip('0')].append((dStr, accNo, info['holdings']))
                count += 1
    print('period',period,'- total of',len(cikTo13Fs),'ciks,',count,'13F filings')
    #
    # Get a consolidated list of positions for each cik. For ciks with multiple filings
    # this may involve combining amended filings. Since the filed amendment type is
    # unreliable I use a simple rule of thumb - if the amendment has more than half
    # as many positions as the previous filing I assume it's a restatement, otherwise
    # I add its positions to the previous filing.
    cikToPosList = {}
    for cik, cik13FList in cikTo13Fs.items() :
        cik13FList.sort()  # sort by day and then by accession number
        i = 0
        j = 1
        while j < len(cik13FList) :
            if len(cik13FList[j][2]) > len(cik13FList[i][2])//2 :
                # relatively many new positions - assume filing j is a restatement
                i = j
            j += 1
        if j != 1 :
            print('CIK',cik,i,'-',j,[(dStr,accNo,len(holdings))
                                     for dStr,accNo,holdings in cik13FList])
        combHoldings = cik13FList[i][2]
        while i+1 < j :
                i += 1
                combHoldings = combHoldings + cik13FList[i][2]
        posList = condenseHoldings(combHoldings, minFrac=minFrac, maxFrac=maxFrac,
                                minStocksPerInv=minStocksPerInv, maxStocksPerInv=maxStocksPerInv,
                                minTop10Frac=minTop10Frac, minAUM=minAUM,
                                allCusipCounter=allCusipCounter,
                                allHoldingsMap=allHoldingsMap, forCik=cik)
        if posList is not None :
            cikToPosList[cik] = posList
    res = {}
    for cik,posList in cikToPosList.items() :
        res[cik] = dict((cusip,frac) for cusip,_,frac in posList)
    return res

def addHoldingsMap(holdingsMap, extraHoldingsMap) :
    """
    Adds positions in extraHoldingsMap to holdingsMap.
    Each argument is a dict: cik -> {cusip -> pct}
    but extraHoldingsMap may contain ciks and cusips not in holdingsMap.
    """
    for cik,extraPosMap in extraHoldingsMap.items() :
        if cik not in holdingsMap :
            holdingsMap[cik] = {}
        posMap = holdingsMap[cik]
        for cusip,frac in extraPosMap.items() :
            posMap[cusip] = posMap.get(cusip,0.0) + frac

def printRemoveStocksMessage(cusipsToRemove, delCount, msg) :
    print(msg,'- removed',len(cusipsToRemove)-delCount,'stocks')
    return len(cusipsToRemove)

def holdingsMapToMatrix(holdingsMap, minInvestorsPerStock=None, maxInvestorsPerStock=None,
                        minAllInvestorsPerStock=None, maxAllInvestorsPerStock=None, allCusipCounter=None,
                        cusipFilter=None, dtype=np.float64) :
    """
    Converts a holdings map: cik -> {cusip -> frac} into a matrix.

    Returns mat, ciks, cusips where mat is a matrix of shape (len(ciks), len(cusips))
    in which each row has the fractions held by the corresponding cik in each cusip.

    If minInvestorsPerStock is specified, restricts to stocks with at least that many investors
    in the returned matrix; likewise, maxInvestorsPerStock can be used to give an upper bound.

    If minAllInvestorsPerStock or maxAllInvestorsPerStock is specified, then allCusipCounter
    should be a Counter counting all investors that have any position in each stock,
    and the result will be restricted based on this count.

    If cusipFilter is specified, this should be a function that returns True for cusips to keep.
    """
    cusipCounter = collections.Counter()
    for posMap in holdingsMap.values() :
        cusipCounter.update(posMap.keys())
    cusipsToRemove = set()
    delCount = 0
    if (minInvestorsPerStock is None and maxInvestorsPerStock is None 
            and minAllInvestorsPerStock is None and maxAllInvestorsPerStock is None) :
        print('not limiting number of investors per stock')
    else :
        if minAllInvestorsPerStock is not None :
            cusipsToRemove.update(cusip for cusip in cusipCounter
                                  if allCusipCounter[cusip] < minAllInvestorsPerStock)
            delCount = printRemoveStocksMessage(cusipsToRemove,delCount,
                        f'requiring at least {minAllInvestorsPerStock} ALL investors per stock')
        if maxAllInvestorsPerStock is not None :
            cusipsToRemove.update(cusip for cusip in cusipCounter
                                  if allCusipCounter[cusip] > maxAllInvestorsPerStock)
            delCount = printRemoveStocksMessage(cusipsToRemove,delCount,
                        f'requiring at most {maxAllInvestorsPerStock} ALL investors per stock')
        if minInvestorsPerStock is not None :
            cusipsToRemove.update(cusip for cusip,count in cusipCounter.items()
                                  if count < minInvestorsPerStock)
            delCount = printRemoveStocksMessage(cusipsToRemove,delCount,
                        f'requiring at least {minInvestorsPerStock} investors per stock')
        if maxInvestorsPerStock is not None :
            cusipsToRemove.update(cusip for cusip,count in cusipCounter.items()
                                  if count > maxInvestorsPerStock)
            delCount = printRemoveStocksMessage(cusipsToRemove,delCount,
                        f'requiring at most {maxInvestorsPerStock} investors per stock')
    if cusipFilter is not None :
        cusipsToRemove.update(cusip for cusip in cusipCounter
                              if not cusipFilter(cusip))
        delCount = printRemoveStocksMessage(cusipsToRemove,delCount,'applying CUSIP filter')
    if delCount > 0 :
        print('removed a total of',delCount,'stocks')
    cusips = sorted(set(cusipCounter.keys()) - cusipsToRemove)
    ciks = sorted(cik.zfill(10) for cik,posMap in holdingsMap.items()
                  if any((cusip not in cusipsToRemove) for cusip in posMap))
    if len(ciks) < len(holdingsMap) :
        print('removed',len(holdingsMap)-len(ciks),'investors with no remaining positions')
    print(f'final counts: {len(ciks):,} investors; {len(cusips):,} stocks;',end=' ')
    cikToRow = indexMap(ciks)
    cusipToCol = indexMap(cusips)
    mat = np.zeros((len(ciks), len(cusips)), dtype=dtype)
    count = 0
    for cik,posMap in holdingsMap.items() :
        cikRow = cikToRow.get(cik.zfill(10))
        if cikToRow is None :
            continue
        for cusip,frac in posMap.items() :
            if cusip not in cusipsToRemove :
                mat[cikRow, cusipToCol[cusip]] = frac
                count += 1
    print(f'{count:,} positions')
    return mat, ciks, cusips    

qStartEnds = ['0101','0401','0701','1001','0101']
qPeriods = ['-03-31','-06-30','-09-30','-12-31']
def getPeriodAndNextQStartEnd(y, qNo) :
    """
    Returns the 13F period date for a given year and quarter number (this is the
    last day in the quarter), along with the start and end dateStrs for the next
    quarter (this is the date range when the 13Fs for this year should be filed).
    Quarters are numbered 1-4.
    """
    nextY = y+1 if qNo==4 else y
    nextQNo = 1 if qNo==4 else qNo+1
    return (str(y)+qPeriods[qNo-1],
            {'startD' : str(nextY) + qStartEnds[nextQNo-1],
             'endD' : str(nextY+1 if nextQNo==4 else nextY) + qStartEnds[nextQNo]})

def getNSSForQ(y, qNo, minFrac=0.01, maxFrac=1.0, minStocksPerInv=3, maxStocksPerInv=100,
               minTop10Frac=0.4, minAUM=None, dtype=np.float64,
               minInvestorsPerStock=2, maxInvestorsPerStock=None,
               minAllInvestorsPerStock=None, maxAllInvestorsPerStock=None, allCusipCounter=None,
               cusipFilter=None, extraHoldingsMaps=[], include13F=True) :
    """
    Calculates a matrix of investor holdings for a quarter, based on all 13F filings filed
    during the succeeding quarter.

    Returns mat, ciks, cusips where mat is a matrix of shape (len(ciks), len(cusips))
    in which each row has the fractions held by the corresponding cik in each cusip.

    If minFrac and/or maxFrac is supplied, restricts to stocks with fraction of
    total portfolio >=minFrac and/or <=maxFrac.

    If minStocksPerInv, maxStocksPerInv, minTop10Frac or minAUM are specified, omits
    investors with too few stocks, too many stocks, too small a fraction in the
    top 10 holdings, or too small a total stock value.
    If minInvestorsPerStock is specified, restricts to stocks with at least that many investors
    in the returned matrix; likewise, maxInvestorsPerStock can be used to give an upper bound.
    If minAllInvestorsPerStock or maxAllInvestorsPerStock is specified, then allCusipCounter
    should be a Counter counting all investors that have any position in each stock,
    and the result will be restricted based on this count.
    If cusipFilter is specified, this should be a function that returns True for cusips to keep.

    Optionally adds holdings from a list of extraHoldingsMaps (used for 13G/13D filings).
    """
    if ((minAllInvestorsPerStock is not None or maxAllInvestorsPerStock is not None)
            and allCusipCounter is None) :
        allCusipCounter = collections.Counter()
    if include13F :
        period, nextQStartEnd = getPeriodAndNextQStartEnd(y,qNo)
        holdingsMap = getHoldingsMap(scraper13F(**nextQStartEnd), period,
                                     minFrac=minFrac, maxFrac=maxFrac,
                                     minStocksPerInv=minStocksPerInv, maxStocksPerInv=maxStocksPerInv,
                                     minTop10Frac=minTop10Frac, minAUM=minAUM,
                                     allCusipCounter=allCusipCounter)
    else :
        holdingsMap = {}
    for extraHoldingsMap in extraHoldingsMaps :
        addHoldingsMap(holdingsMap,extraHoldingsMap)
    return holdingsMapToMatrix(holdingsMap, minInvestorsPerStock=minInvestorsPerStock,
                               maxInvestorsPerStock=maxInvestorsPerStock,
                               minAllInvestorsPerStock=minAllInvestorsPerStock,
                               maxAllInvestorsPerStock=maxAllInvestorsPerStock,
                               allCusipCounter=allCusipCounter, cusipFilter=cusipFilter, dtype=dtype)

def saveConvMatrixPy2(y, qNo, minFrac=0.13, maxFrac=0.4, minStocksPerInv=3, maxStocksPerInv=500,
                      minTop10Frac=None, minAUM=7.5e7, dtype=np.float64,
                      minInvestorsPerStock=2, maxInvestorsPerStock=None) :
    """
    Save a matrix of 13F conviction positions only for the given quarter,
    in a format readable by the BW old Python2 version.
    """
    mat, ciks, cusips = getNSSForQ(y, qNo, minFrac=minFrac, maxFrac=maxFrac,
                                   minStocksPerInv=minStocksPerInv, maxStocksPerInv=maxStocksPerInv,
                                   minTop10Frac=minTop10Frac, minAUM=minAUM, dtype=dtype,
                                   minInvestorsPerStock=minInvestorsPerStock,
                                   maxInvestorsPerStock=maxInvestorsPerStock)
    ciks = [cik.encode(encoding='ascii',errors='ignore') for cik in ciks]
    cusips = [cusip.encode(encoding='ascii',errors='ignore') for cusip in cusips]
    m = ([[('0' if el==0.0 else str(el)).encode(encoding='ascii') for el in row] for row in mat],
         ciks, indexMap(ciks), cusips, indexMap(cusips))
    fPath = os.path.join(utils.stockDataRoot,f'Conv{y}Q{qNo}.pkl')
    print('saving to',fPath)
    utils.pickSave(fPath, m, fix_imports=True, protocol=2)

In [None]:
# hm = getHoldingsMap(scraper13F(startD='20210401',endD='20210630'),'2021-03-31')

min stock fraction of portfolio 0.0
max stock fraction of portfolio 1.0
period 2021-03-31 - total of 5911 ciks, 6038 13F filings
CIK 1731061 1 - 2 [('20210408', '0001104659-21-047884', 829), ('20210506', '0001104659-21-062185', 895)]
CIK 1386929 1 - 2 [('20210412', '0001386929-21-000005', 79), ('20210414', '0001386929-21-000006', 79)]
CIK 1105410 1 - 2 [('20210413', '0001214659-21-004137', 50), ('20210421', '0001214659-21-004362', 50)]
CIK 1566601 1 - 2 [('20210413', '0001566601-21-000005', 764), ('20210414', '0001566601-21-000006', 764)]
CIK 1573767 1 - 2 [('20210413', '0001573767-21-000006', 49), ('20210505', '0001573767-21-000007', 66)]
CIK 1840740 1 - 2 [('20210413', '0001840740-21-000005', 51), ('20210414', '0001840740-21-000006', 51)]
CIK 1053994 1 - 2 [('20210414', '0001085146-21-001185', 184), ('20210419', '0001085146-21-001232', 296)]
CIK 1381055 2 - 3 [('20210416', '0001381055-21-000002', 101), ('20210505', '0001381055-21-000003', 101), ('20210506', '0001381055-21-000004', 10

CIK 1846177 1 - 2 [('20210514', '0001725547-21-000052', 190), ('20210517', '0001725547-21-000059', 207)]
CIK 1859606 0 - 2 [('20210514', '0001859606-21-000004', 527), ('20210520', '0001859606-21-000008', 209)]
CIK 1063296 1 - 2 [('20210517', '0000905718-21-000694', 10), ('20210518', '0000905718-21-000710', 10)]
CIK 1080383 1 - 2 [('20210517', '0000919574-21-003508', 53), ('20210518', '0000919574-21-003778', 55)]
CIK 1086619 1 - 2 [('20210517', '0001567619-21-010327', 3238), ('20210528', '0001567619-21-011331', 3238)]
CIK 1119376 1 - 2 [('20210517', '0001567619-21-010351', 38), ('20210518', '0001567619-21-010595', 40)]
CIK 1164688 1 - 2 [('20210517', '0001172661-21-001214', 16), ('20210521', '0001172661-21-001373', 16)]
CIK 1224962 1 - 2 [('20210517', '0001012975-21-000229', 138), ('20210610', '0001012975-21-000242', 139)]
CIK 1332784 1 - 2 [('20210517', '0001567619-21-010483', 12), ('20210623', '0001567619-21-012524', 12)]
CIK 1387369 1 - 2 [('20210517', '0000919574-21-003641', 39), ('

In [None]:
# hm['1452208']

{'254687106': 0.07866772348138226,
 '469814107': 0.07127893267463532,
 'D94523145': 0.06528993263237423,
 '037833100': 0.06474830071610985,
 '09253U108': 0.05895508163721184,
 '025816109': 0.0580184379094043,
 '911312106': 0.05726032572087499,
 '38141G104': 0.056938623961692476,
 '58933Y105': 0.04326845537444619,
 'G7945M107': 0.04122957344761652,
 '075887109': 0.04112176454708619,
 'H1467J104': 0.036987940065151076,
 '478160104': 0.036776634620111624,
 '097023105': 0.03520262467236875,
 '219350105': 0.03264022272456378,
 '126650100': 0.03249015273502556,
 '35137L204': 0.024812434074857326,
 '053807103': 0.02342989273445633,
 '75513E101': 0.019414226807502464,
 '278642103': 0.013259632294026782,
 '37045V100': 0.01156487637768994,
 '064058100': 0.010358279162954447,
 '78462F103': 0.008409094241366016,
 '74731Q103': 0.006565130806695191,
 '372460105': 0.006504757822398205,
 '902973304': 0.006427135414016364,
 'G491BT108': 0.005634524377317352,
 'E8T87A100': 0.005067018324925676,
 '458140

Test generating a combined holdings matrix:

In [None]:
assert getPeriodAndNextQStartEnd(2020,1)==('2020-03-31', {'startD': '20200401', 'endD': '20200701'}),"13F qstart/end"
assert getPeriodAndNextQStartEnd(2020,4)==('2020-12-31', {'startD': '20210101', 'endD': '20210401'}),"13F qstart/end"

m, ciks, cusips = holdingsMapToMatrix(getHoldingsMap(s,'2021-03-31'))
assert (ciks==['0001325083', '0001867040']
        and cusips[:2]==['00289Y107','003279730'] and cusips[-2:]==['G11196105','M40527109']
        and m.shape==(2,42) and abs(m[0,0]-0.03502463)<1e-6 and abs(m[1,13]-0.9675410812420477)<1e-6
       ), "combined holdings matrix"

min stock fraction of portfolio 0.0
max stock fraction of portfolio 1.0
period 2021-03-31 - total of 2 ciks, 2 13F filings
not limiting number of investors per stock
final counts: 2 investors; 42 stocks; 42 positions


In [None]:
# mat = saveConvMatrixPy2(2021, 2, minAUM=1e8)

min stock fraction of portfolio 0.13
max stock fraction of portfolio 0.4
min stocks per investor 3
max stocks per investor 500
min AUM (total portfolio value) 100000000.0
period 2021-06-30 - total of 5922 ciks, 6057 13F filings
CIK 1755670 1 - 2 [('20210708', '0001755670-21-000004', 396), ('20210804', '0001755670-21-000005', 509)]
CIK 1842560 1 - 2 [('20210709', '0001842560-21-000003', 332), ('20210709', '0001842560-21-000004', 332)]
CIK 1353395 1 - 2 [('20210712', '0001085146-21-001901', 260), ('20210713', '0001085146-21-001945', 154)]
CIK 1092903 1 - 2 [('20210713', '0001096906-21-001592', 708), ('20210805', '0001096906-21-001852', 708)]
CIK 1840740 1 - 2 [('20210720', '0001840740-21-000007', 52), ('20210720', '0001840740-21-000008', 52)]
CIK 1609674 1 - 2 [('20210721', '0001609674-21-000004', 175), ('20210816', '0001609674-21-000005', 176)]
CIK 1666786 1 - 2 [('20210721', '0001606587-21-000807', 188), ('20210721', '0001606587-21-000811', 188)]
CIK 1767080 0 - 2 [('20210721', '000176

CIK 1844873 2 - 3 [('20210813', '0001844873-21-000004', 81), ('20210816', '0001844873-21-000006', 81), ('20210817', '0001844873-21-000007', 81)]
CIK 1869028 1 - 2 [('20210813', '0001398344-21-016180', 2), ('20210823', '0001398344-21-016691', 2)]
CIK 898427 1 - 2 [('20210813', '0000898427-21-000016', 2347), ('20210813', '0000898427-21-000017', 2351)]
CIK 1084580 1 - 2 [('20210816', '0001085146-21-002551', 1648), ('20210820', '0001085146-21-002619', 1626)]
CIK 1144208 1 - 2 [('20210816', '0001085146-21-002607', 686), ('20210818', '0001085146-21-002615', 685)]
CIK 1212897 0 - 2 [('20210816', '0001315863-21-000701', 18), ('20210817', '0001315863-21-000708', 1)]
CIK 1224962 1 - 2 [('20210816', '0001012975-21-000320', 149), ('20210817', '0001012975-21-000326', 150)]
CIK 1301050 1 - 2 [('20210816', '0001567619-21-015630', 49), ('20210816', '0001567619-21-015814', 57)]
CIK 1393818 1 - 2 [('20210816', '0000950123-21-011566', 623), ('20210818', '0000950123-21-011650', 624)]
CIK 1418814 0 - 2 [('

In [None]:
# cusipNames = utils.pickLoad(os.path.join(utils.stockDataRoot,'cusipMap.pkl'))
# print(len(cusips),'CUSIPs')
# cn = [cusipNames.get(cusip,'CUSIP-'+cusip) for cusip in cusips]
# fundPats = [re.compile(patStr,re.IGNORECASE)
#            for patStr in [r'\b'+fundStr+r'\b' for fundStr in ['etf','ishares','spdr']]
#                         + [r'^'+fundStr for fundStr in ['janus','pimco','schwab','vanguard','berkshire']]]
# len([c for c in cn if any(fundPat.search(c) for fundPat in fundPats)])

117

In [None]:
# mat,ciks,cusips = getNSSForQ(2021, 1, minTop10Frac=0.3)

min stock fraction of portfolio 0.01
max stock fraction of portfolio 1.0
min stocks per investor 3
max stocks per investor 100
min fraction of portfolio in top 10 positions 0.3
period 2021-03-31 - total of 5911 ciks, 6039 13F filings
CIK 1731061 1 - 2 [('20210408', '0001104659-21-047884', 829), ('20210506', '0001104659-21-062185', 895)]
CIK 1386929 1 - 2 [('20210412', '0001386929-21-000005', 79), ('20210414', '0001386929-21-000006', 79)]
CIK 1105410 1 - 2 [('20210413', '0001214659-21-004137', 50), ('20210421', '0001214659-21-004362', 50)]
CIK 1566601 1 - 2 [('20210413', '0001566601-21-000005', 764), ('20210414', '0001566601-21-000006', 764)]
CIK 1573767 1 - 2 [('20210413', '0001573767-21-000006', 49), ('20210505', '0001573767-21-000007', 66)]
CIK 1840740 1 - 2 [('20210413', '0001840740-21-000005', 51), ('20210414', '0001840740-21-000006', 51)]
CIK 1053994 1 - 2 [('20210414', '0001085146-21-001185', 184), ('20210419', '0001085146-21-001232', 296)]
CIK 1381055 2 - 3 [('20210416', '000138

CIK 1063296 1 - 2 [('20210517', '0000905718-21-000694', 10), ('20210518', '0000905718-21-000710', 10)]
CIK 1080383 1 - 2 [('20210517', '0000919574-21-003508', 53), ('20210518', '0000919574-21-003778', 55)]
CIK 1086619 1 - 2 [('20210517', '0001567619-21-010327', 3238), ('20210528', '0001567619-21-011331', 3238)]
CIK 1119376 1 - 2 [('20210517', '0001567619-21-010351', 38), ('20210518', '0001567619-21-010595', 40)]
CIK 1164688 1 - 2 [('20210517', '0001172661-21-001214', 16), ('20210521', '0001172661-21-001373', 16)]
CIK 1224962 1 - 2 [('20210517', '0001012975-21-000229', 138), ('20210610', '0001012975-21-000242', 139)]
CIK 1332784 1 - 2 [('20210517', '0001567619-21-010483', 12), ('20210623', '0001567619-21-012524', 12)]
CIK 1387369 1 - 2 [('20210517', '0000919574-21-003641', 39), ('20210517', '0000919574-21-003738', 40)]
CIK 1393818 1 - 2 [('20210517', '0000950123-21-007113', 500), ('20210519', '0000950123-21-007183', 500)]
CIK 1423053 1 - 3 [('20210517', '0000950123-21-007021', 16138), (

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()