In [None]:
# default_exp cikFInfo

# cikFInfo

> Save parsed form info for each CIK in a separate JSON-format file.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import collections
import json
import os

from secscan import utils, dailyList
from secscan import scrape13F, scrape8K, scrape6K, scrape13G, scrape13D, scrape4

defaultCikFInfoDir = os.path.join(utils.stockDataRoot,'cikFInfo')
allScraperClasses = [scrape13F.scraper13F,scrape8K.scraper8K,scrape6K.scraper6K,
                     scrape13G.scraper13G,scrape13D.scraper13D,scrape4.scraper4]
cikFPrefLen = 4

Save parsed form info for each CIK in a separate JSON-format file.
The format is designed so that info for additional forms can simply be appended to the file.

In [None]:
#export

def getCikFInfoDirAndPath(cik, cikFInfoDir=defaultCikFInfoDir) :
    if len(cik)<2 or not cik.isdigit() or cik[0]=='0' :
        raise ValueError(f'invalid CIK "{cik}"')
    fDir = os.path.join(cikFInfoDir,cik[:cikFPrefLen])
    return fDir,os.path.join(fDir,cik+'.json')

def jsonValError(msg, s) :
    if len(s) > 200 :
        s = s[:100] + ' ... ' + s[-100:]
    return ValueError(msg + ' in ' + s)

def loadCikFInfo(cik, cikFInfoDir=defaultCikFInfoDir, returnAsText=False) :
    cik = str(cik).lstrip('0')
    fPath = getCikFInfoDirAndPath(cik, cikFInfoDir)[1]
    if not os.path.exists(fPath) :
        return {}
    with open(fPath,'r',encoding='ascii') as f :
        s = f.read().strip()
    if s[-1] != ',' :
        raise jsonValError('missing ending ,', s)
    if returnAsText :
        return s[:-1]
    return json.loads('{'+s[:-1]+'}')

def saveCikFInfo(cik, cikFInfo, removeDups=False, cikFInfoDir=defaultCikFInfoDir) :
    if removeDups :
        existingCikFInfo = loadCikFInfo(cik, cikFInfoDir=cikFInfoDir)
        cikFInfo = dict((k,v) for k,v in cikFInfo.items() if k not in existingCikFInfo)
    if len(cikFInfo) == 0 :
        return
    s = json.dumps(cikFInfo, indent=0).strip()
    if s[0]!='{' or s[-1]!='}' :
        raise jsonValError('missing start/end {}', s)
    fDir, fPath = getCikFInfoDirAndPath(cik, cikFInfoDir)
    if not os.path.exists(fDir) :
        os.makedirs(fDir)
    with open(fPath,'a',encoding='ascii') as f :
        f.write(s[1:-1])
        f.write(',\n')

def saveAllCikFInfo(startD, endD, scraperClasses,
                    removeDups=True, cikFInfoDir=defaultCikFInfoDir, ciks=None) :
    dl = dailyList.dailyList(startD=startD, endD=endD)
    datesPresent = utils.loadPklFromDir(cikFInfoDir, "dates.pkl", set())
    cikInfoMap = {}
    for scraperClass in scraperClasses :
        scraper = scraperClass(startD=startD, endD=endD)
        if scraper.formClass.startswith('SC 13') :
            # fill in cik names
            for dInfo in scraper.infoMap.values() :
                for info in dInfo.values() :
                    if 'ciks' in info :
                        info['cikNames'] = []
                        for cik in info['ciks'] :
                            info['cikNames'].append(dl.cikNames.get(cik.lstrip('0'),
                                                            ('CIK'+cik.lstrip('0'),))[0])
        elif scraper.formClass.startswith('INSIDER') :
            for dInfo in scraper.infoMap.values() :
                for info in dInfo.values() :
                    issuerCik = None
                    for cik,cikType in zip(info['ciks'],info['cikTypes']) :
                        if cikType == 'Issuer' :
                            issuerCik = cik
                            break
                    if issuerCik is not None :
                        info['issuerName'] = dl.cikNames.get(issuerCik.lstrip('0'),
                                                             ('CIK'+issuerCik.lstrip('0'),))[0]
        elif scraper.formClass.startswith('13F') :
            for dInfo in scraper.infoMap.values() :
                for info in dInfo.values() :
                    info['holdings'] = scrape13F.get13FHoldingsReportList(info['holdings'],
                                                                          minFrac=0.01)
        scraper.addToCikInfoMap(dl, cikInfoMap, ciks=ciks, excludeDates=datesPresent)
    for cik,cikFInfo in cikInfoMap.items() :
        if (ciks is not None and cik not in ciks) :
            continue
        saveCikFInfo(cik, cikFInfo, removeDups=removeDups, cikFInfoDir=cikFInfoDir)
    datesPresent.update(dl.dl.keys())
    utils.savePklToDir(cikFInfoDir, "dates.pkl", datesPresent)

def prSample(s, n=2, filterF=lambda x : True) :
    res = []
    for dStr,dInfo in s.infoMap.items() :
        for accNo,info in dInfo.items() :
            if filterF(info) :
                print(dStr,accNo,info)
                res.append(info)
                if len(res) >= n :
                    return (res[0] if n==1 else res)

def saveYears(startY, endY,
              removeDups=False, cikFInfoDir=defaultCikFInfoDir, ciks=None) :
    qList = ['0101', '0401', '0701', '1001', '0101']
    for y in range(startY, endY) :
        for qs, qe in zip(qList, qList[1:]) :
            startD, endD = f'{y}{qs}', f'{y+1 if qe=="0101" else y}{qe}'
            print(startD, endD)
            saveAllCikFInfo(startD, endD, allScraperClasses,
                            removeDups=removeDups, cikFInfoDir=cikFInfoDir, ciks=ciks)

In [None]:
# scrape13F.get13FHoldingsReportList(r['holdings'], minFrac=0.01)

[('037833100', 'APPLE', 55525.0, '8.05%'),
 ('594918104', 'MICROSOFT', 45388.0, '6.58%'),
 ('742718109', 'PROCTER & GAMBLE', 27049.0, '3.92%'),
 ('00287Y109', 'ABBVIE', 25502.0, '3.70%'),
 ('30231G102', 'EXXON MOBIL', 25476.0, '3.69%'),
 ('478160104', 'JOHNSON & JOHNSON', 24742.0, '3.59%'),
 ('11135F101', 'BROADCOM', 23882.0, '3.46%'),
 ('256677105', 'DOLLAR GENERAL', 22033.0, '3.19%'),
 ('437076102', 'HOME DEPOT', 18729.0, '2.71%'),
 ('17275R102', 'CISCO SYSTEMS', 18188.0, '2.64%'),
 ('617446448', 'MORGAN STANLEY', 17132.0, '2.48%'),
 ('97717X669', 'WISDOMTREE US DIV GROWTH ETF', 16214.0, '2.35%'),
 ('031162100', 'AMGEN', 15452.0, '2.24%'),
 ('666807102', 'NORTHROP GRUMMAN', 14728.0, '2.13%'),
 ('532457108', 'ELI LILLY', 13645.0, '1.98%'),
 ('427866108', 'HERSHEY', 13179.0, '1.91%'),
 ('451107106', 'IDACORP', 12180.0, '1.76%'),
 ('02079K305', 'ALPHABET CL A', 11836.0, '1.72%'),
 ('717081103', 'PFIZER', 11791.0, '1.71%'),
 ('03027X100', 'AMERICAN TOWER', 11189.0, '1.62%'),
 ('253868103

Code to check CIK format and figure out the right prefix length:

In [None]:
# def checkCiks() :
#     cikNames = utils.pickLoad(os.path.join(utils.stockDataRoot,'dlMaps','cikNames.pkl'))
#     print('ciks with leading 0', [cik for cik in cikNames if cik[0]=='0'])
#     print('less than 4 long', [cik for cik in cikNames if len(cik)<4])
#     for prefLen in [3,4] :
#         ciksByPref = collections.defaultdict(list)
#         for cik in cikNames :
#             ciksByPref[cik[:prefLen]].append(cik)
#         print(f'prefix length {prefLen}: {len(ciksByPref)} folders,'
#               +f' max {max(len(v) for v in ciksByPref.values())} files')
# checkCiks()
# OUTPUT: 
# ciks with leading 0 []
# less than 4 long ['63']
# prefix length 3: 773 folders, max 9154 files
# prefix length 4: 4003 folders, max 933 files
# - chose prefix length 4

In [None]:
# l = loadCikFInfo(73290)
# l['0001513162-21-000147'] # 4
# l['0001398344-22-003355'] # 13G

{'links': [['primary_doc.html',
   'PRIMARY DOCUMENT',
   '4',
   '/Archives/edgar/data/73290/000151316221000147/xslF345X03/primary_doc.xml'],
  ['primary_doc.xml',
   'PRIMARY DOCUMENT',
   '4',
   '/Archives/edgar/data/73290/000151316221000147/primary_doc.xml']],
 'complete': '/Archives/edgar/data/73290/000151316221000147/0001513162-21-000147.txt',
 'ciks': ['0000073290', '0001271102'],
 'cikTypes': ['Issuer', 'Reporting'],
 'period': '2021-12-09',
 'acceptDate': '2021-12-10',
 'acceptTime': '17:48:30',
 'transactions': [['Non-qualified stock option',
   '2021-12-09',
   'A',
   'A',
   '45000',
   '',
   '',
   'D',
   '265000']],
 'reportingName': ['BARBIERI ALLEN'],
 'reportingCik': ['1271102']}

In [None]:
# l['0000732717-23-000007']['itemTexts']

['Item 2.02 Results of Operations and Financial Condition. The registrant announced on January 25, 2023, its results of operations for the fourth quarter of 2022. The text of the press release and accompanying financial information are attached as exhibits and incorporated herein by reference. ',
 'Item 9.01 Financial Statements and Exhibits. The following exhibits are furnished as part of this report: (d) Exhibits 99.1 Press release dated January 25, 2023 reporting financial results for the fourth quarter ended December 31, 2022. 99.2 AT&T Inc. selected financial statements and operating data. 99.3 Discussion and reconciliation of non-GAAP measures. 99.4 Supplemental Quarterly Standalone AT&T Financial Information. 104 Cover Page Interactive Data File (embedded within the Inline XBRL document) Signature Pursuant to the requirements of the Securities Exchange Act of 1934, the Registrant has duly caused this report to be signed on its behalf by the undersigned hereunto duly authorized. 

In [None]:
# dict((k,v) for k,v in l.items() if k!='filings')

{'cik': '732717',
 'entityType': 'operating',
 'sic': '4813',
 'sicDescription': 'Telephone Communications (No Radiotelephone)',
 'insiderTransactionForOwnerExists': 1,
 'insiderTransactionForIssuerExists': 1,
 'name': 'AT&T INC.',
 'tickers': ['T', 'TBB', 'TBC', 'T-PA', 'T-PC'],
 'exchanges': ['NYSE', 'NYSE', 'NYSE', 'NYSE', 'NYSE'],
 'ein': '431301883',
 'description': '',
 'website': '',
 'investorWebsite': '',
 'category': 'Large accelerated filer',
 'fiscalYearEnd': '1231',
 'stateOfIncorporation': 'DE',
 'stateOfIncorporationDescription': 'DE',
 'addresses': {'mailing': {'street1': '208 S. AKARD ST',
   'street2': None,
   'city': 'DALLAS',
   'stateOrCountry': 'TX',
   'zipCode': '75202',
   'stateOrCountryDescription': 'TX'},
  'business': {'street1': '208 S. AKARD ST',
   'street2': None,
   'city': 'DALLAS',
   'stateOrCountry': 'TX',
   'zipCode': '75202',
   'stateOrCountryDescription': 'TX'}},
 'phone': '2108214105',
 'flags': '',
 'formerNames': [{'name': 'SBC COMMUNICATI

In [None]:
# saveYears(2017,2024)

20170101 20170401
20170401 20170701
20170701 20171001
20171001 20180101
20180101 20180401
20180401 20180701
20180701 20181001
20181001 20190101
20190101 20190401
20190401 20190701
20190701 20191001
20191001 20200101
20200101 20200401
20200401 20200701
20200701 20201001
20201001 20210101
20210101 20210401
20210401 20210701
20210701 20211001
20211001 20220101
20220101 20220401
20220401 20220701
20220701 20221001
20221001 20230101
20230101 20230401
20230401 20230701
20230701 20231001
20231001 20240101


In [None]:
# ll = sorted(utils.loadPklFromDir(defaultCikFInfoDir,'dates.pkl','xxx')); ll[0],ll[-1]

('20170101', '20230203')

Test cik file info class:

In [None]:
assert loadCikFInfo('123')=={}
fDir, fPath = getCikFInfoDirAndPath('12345')
assert fDir.endswith('12345'[:cikFPrefLen]) and fPath.endswith('12345.json')

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()