In [None]:
# default_exp cikFInfo

# cikFInfo

> Save parsed form info for each CIK in a separate JSON-format file.

In [None]:
#hide
%load_ext autoreload
%autoreload 2
from nbdev import show_doc

In [None]:
#export

import collections
import json
import os

from secscan import utils, dailyList
from secscan import scrape13F, scrape8K, scrape6K, scrape13G, scrape13D, scrape4

defaultCikFInfoDir = os.path.join(utils.stockDataRoot,'cikFInfo')
allScraperClasses = [scrape13F.scraper13F,scrape8K.scraper8K,scrape6K.scraper6K,
                     scrape13G.scraper13G,scrape13D.scraper13D,scrape4.scraper4]
cikFPrefLen = 4

Save parsed form info for each CIK in a separate JSON-format file.
The format is designed so that info for additional forms can simply be appended to the file.

In [None]:
#export

def getCikFInfoDirAndPath(cik, cikFInfoDir=defaultCikFInfoDir) :
    if len(cik)<2 or not cik.isdigit() or cik[0]=='0' :
        raise ValueError(f'invalid CIK "{cik}"')
    fDir = os.path.join(cikFInfoDir,cik[:cikFPrefLen])
    return fDir,os.path.join(fDir,cik+'.json')

def jsonValError(msg, s) :
    if len(s) > 200 :
        s = s[:100] + ' ... ' + s[-100:]
    return ValueError(msg + ' in ' + s)

def loadCikFInfo(cik, cikFInfoDir=defaultCikFInfoDir, returnAsText=False) :
    fPath = getCikFInfoDirAndPath(cik, cikFInfoDir)[1]
    if not os.path.exists(fPath) :
        return {}
    with open(fPath,'r',encoding='ascii') as f :
        s = f.read().strip()
    if s[-1] != ',' :
        raise jsonValError('missing ending ,', s)
    if returnAsText :
        return s[:-1]
    return json.loads('{'+s[:-1]+'}')

def saveCikFInfo(cik, cikFInfo, removeDups=False, cikFInfoDir=defaultCikFInfoDir) :
    if removeDups :
        existingCikFInfo = loadCikFInfo(cik, cikFInfoDir=cikFInfoDir)
        cikFInfo = dict((k,v) for k,v in cikFInfo.items() if k not in existingCikFInfo)
    if len(cikFInfo) == 0 :
        return
    s = json.dumps(cikFInfo, indent=0).strip()
    if s[0]!='{' or s[-1]!='}' :
        raise jsonValError('missing start/end {}', s)
    fDir, fPath = getCikFInfoDirAndPath(cik, cikFInfoDir)
    if not os.path.exists(fDir) :
        os.makedirs(fDir)
    with open(fPath,'a',encoding='ascii') as f :
        f.write(s[1:-1])
        f.write(',\n')

def saveAllCikFInfo(startD, endD, scraperClasses,
                    removeDups=True, cikFInfoDir=defaultCikFInfoDir,
                    ciks=None) :
    dl = dailyList.dailyList(startD=startD, endD=endD)
    cikInfoMap = {}
    for scraperClass in scraperClasses :
        scraper = scraperClass(startD=startD, endD=endD)
        scraper.addToCikInfoMap(dl, cikInfoMap, ciks=ciks)
    for cik,cikFInfo in cikInfoMap.items() :
        if (ciks is not None and cik not in ciks) :
            continue
        saveCikFInfo(cik, cikFInfo, removeDups=removeDups, cikFInfoDir=cikFInfoDir)

Code to check CIK format and figure out the right prefix length:

In [None]:
# def checkCiks() :
#     cikNames = utils.pickLoad(os.path.join(utils.stockDataRoot,'dlMaps','cikNames.pkl'))
#     print('ciks with leading 0', [cik for cik in cikNames if cik[0]=='0'])
#     print('less than 4 long', [cik for cik in cikNames if len(cik)<4])
#     for prefLen in [3,4] :
#         ciksByPref = collections.defaultdict(list)
#         for cik in cikNames :
#             ciksByPref[cik[:prefLen]].append(cik)
#         print(f'prefix length {prefLen}: {len(ciksByPref)} folders,'
#               +f' max {max(len(v) for v in ciksByPref.values())} files')
# checkCiks()
# OUTPUT: 
# ciks with leading 0 []
# less than 4 long ['63']
# prefix length 3: 773 folders, max 9154 files
# prefix length 4: 4003 folders, max 933 files
# - chose prefix length 4

In [None]:
htmlPref = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
.collapsible {
  background-color: #DDD;
  cursor: pointer;
  padding: 2px;
  margin: 2px;
  border: 1px solid black;
  text-align: left;
  outline: none;
  font-size: 15px;
}
.collapsible:hover {
  background-color: #AAA;
}
.content {
  padding: 0 2px;
  display: none;
  overflow: hidden;
  background-color: #DDD;
}
.bodyclass {
  font-size: 16px;
  font-family: Arial;
}
</style>
</head>
<body class="bodyclass">
<div><button type="button" id="expandall">expand all</button></div>
<script>
"""

htmlSuff = """
var fList = submissions['filings']['recent'];
var accNos = fList['accessionNumber'];
for (let i=0; i<accNos.length; i++) {
    var el = document.createElement("div");
    el.innerHTML = accNos[i];
    document.body.appendChild(el);
}
var collList = document.getElementsByClassName("collapsible");
for (let i = 0; i < collList.length; i++) {
  collList[i].addEventListener("click", function() {
    let content = this.nextElementSibling;
    content.style.display = (content.style.display === "block" ? "none" : "block");
  }
  );
}
document.getElementById("expandall").addEventListener("click", function() {
    let expandingAll = (this.textContent === 'expand all');
    this.textContent = (expandingAll ? 'collapse all' : 'expand all');
    for (let i = 0; i < collList.length; i++) {
        let content = collList[i].nextElementSibling;
        content.style.display = (expandingAll ? "block" : "none");
    }
});
</script>
</body>
</html>"""

from secscan import getCikFilings

def makeHtml(cik) :
    cik = str(cik).lstrip('0')
    subs = utils.downloadSecUrl(getCikFilings.cikRestFilingsUrl(cik),
                             restData=True, toFormat='text')
    cikFInfo = loadCikFInfo(cik, returnAsText=True)
    with open(os.path.join(utils.stockDataRoot,cik+'.html'), 'w') as f :
        f.write(htmlPref)
        f.write('var fInfo={')
        f.write(cikFInfo)
        f.write('};\n')
        f.write('var submissions=')
        f.write(subs)
        f.write(';\n')
        f.write(htmlSuff)

In [None]:
makeHtml(732717)

In [None]:
# dict((k,v) for k,v in l.items() if k!='filings')

{'cik': '732717',
 'entityType': 'operating',
 'sic': '4813',
 'sicDescription': 'Telephone Communications (No Radiotelephone)',
 'insiderTransactionForOwnerExists': 1,
 'insiderTransactionForIssuerExists': 1,
 'name': 'AT&T INC.',
 'tickers': ['T', 'TBB', 'TBC', 'T-PA', 'T-PC'],
 'exchanges': ['NYSE', 'NYSE', 'NYSE', 'NYSE', 'NYSE'],
 'ein': '431301883',
 'description': '',
 'website': '',
 'investorWebsite': '',
 'category': 'Large accelerated filer',
 'fiscalYearEnd': '1231',
 'stateOfIncorporation': 'DE',
 'stateOfIncorporationDescription': 'DE',
 'addresses': {'mailing': {'street1': '208 S. AKARD ST',
   'street2': None,
   'city': 'DALLAS',
   'stateOrCountry': 'TX',
   'zipCode': '75202',
   'stateOrCountryDescription': 'TX'},
  'business': {'street1': '208 S. AKARD ST',
   'street2': None,
   'city': 'DALLAS',
   'stateOrCountry': 'TX',
   'zipCode': '75202',
   'stateOrCountryDescription': 'TX'}},
 'phone': '2108214105',
 'flags': '',
 'formerNames': [{'name': 'SBC COMMUNICATI

In [None]:
# saveAllCikFInfo('20221001','2024',allScraperClasses)

In [None]:
# saveAllCikFInfo('20220701','20221001',allScraperClasses)

Test cik file info class:

In [None]:
assert loadCikFInfo('123')=={}
fDir, fPath = getCikFInfoDirAndPath('12345')
assert fDir.endswith('12345'[:cikFPrefLen]) and fPath.endswith('12345.json')

In [None]:
#hide
# uncomment and run to regenerate all library Python files
# from nbdev.export import notebook2script; notebook2script()