In [7]:
'''
    Notebook used to generate 'first' and 'last' to generate 
    from the downloaded forms.

'''
import json
import os
from pathlib import Path
from prepline_sec_filings.sec_document import SECDocument, clean_sec_text
from unstructured.documents.html import HTMLListItem

from prepline_sec_filings.sections import SECSection, section_string_to_enum

DIRECTORY = os.getcwd()

RISK_FACTOR_XFAILS = ["aig", "bgs"]

with open(
    os.path.join(DIRECTORY, "fixtures", "sample-first-last.json"),
    "r",
) as f:
    sample_first_last = json.load(f)

with open(os.path.join("test_utils", "examples.json")) as f:
    examples = json.load(f)
    

def get_file_from_ticker(ticker):
    cik = examples[ticker]["cik"]
    formtype = next(iter(examples[ticker]["forms"]))
    accession_number = examples[ticker]["forms"][formtype]
    with open(
        os.path.join("sample-sec-docs", f"{ticker}-{formtype}-{cik}-{accession_number}.xbrl")
    ) as f:
        out = f.read()
    return out

In [9]:
examples

{'mmm': {'cik': '66740', 'forms': {'10-Q': '000006674022000065'}},
 'aig': {'cik': '5272', 'forms': {'10-K': '000110465922024701'}},
 'cl': {'cik': '21665', 'forms': {'10-Q': '000002166522000010'}},
 'cvs': {'cik': '64803', 'forms': {'10-K': '000006480322000008'}},
 'f': {'cik': '37996', 'forms': {'10-Q': '000003799622000024'}},
 'lmt': {'cik': '936468', 'forms': {'10-K': '000093646822000008'}},
 'mrk': {'cik': '310158', 'forms': {'10-Q': '000031015821000028'}},
 'nke': {'cik': '320187', 'forms': {'10-K': '000032018722000038'}},
 'v': {'cik': '1403161', 'forms': {'10-Q': '000140316122000027'}},
 'dis': {'cik': '1744489', 'forms': {'10-K': '000174448921000220'}},
 'brks': {'cik': '933974', 'forms': {'10-Q': '000155837021006699'}},
 'rgld': {'cik': '85535', 'forms': {'10-K': '000155837021011343'}},
 'bc': {'cik': '14930', 'forms': {'10-Q': '000001493021000103'}},
 'cri': {'cik': '1060822', 'forms': {'10-K': '000106082222000096'}},
 'doc': {'cik': '1574540', 'forms': {'10-Q': '00015745402

In [10]:
tickers_10q = [ticker for ticker in sample_first_last if '10-Q' in examples[ticker]['forms']] # filter only 10-Q docs
tickers_10q

['mmm', 'cl', 'f', 'mrk', 'v', 'brks', 'bc', 'doc', 'omcl', 'ava']

In [45]:
def get_doc_elements(tickers):
    docs_all = {}
    for ticker in tickers:
        print('at ticker', ticker)
        text = get_file_from_ticker(ticker)
        doc = SECDocument.from_string(text).doc_after_cleaners(skip_headers_and_footers=True)
        docs_all[ticker] = {}
        docs_all[ticker]["doc"] = doc
        docs_all[ticker]["elements"] = doc.elements
    return docs_all


In [47]:
docs_all = get_doc_elements(tickers_10q)
print(docs_all)

at ticker mmm
at ticker cl
at ticker f
at ticker mrk
at ticker v
at ticker brks
at ticker bc
at ticker doc
at ticker omcl
at ticker ava


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [48]:
print(docs_all.keys())

dict_keys(['mmm', 'cl', 'f', 'mrk', 'v', 'brks', 'bc', 'doc', 'omcl', 'ava'])


In [49]:
def get_doc(docs_all, ticker):
    return docs_all[ticker]["doc"], docs_all[ticker]["elements"]

In [60]:
sections = ['FINANCIAL_STATEMENTS', # ITEM 1
            'MANAGEMENT_DISCUSSION', # ITEM 2
            'MARKET_RISK_DISCLOSURES',  # ITEM 3
            'CONTROLS_AND_PROCEDURES']  # ITEM 4

def print_ticker(docs_all, ticker,sections=sections):
    doc, _ = get_doc(docs_all, ticker)
    print('### ', ticker,' ###')
    for section in sections:
        print('----', section ,'-----')
        for el in doc.get_section_narrative(section_string_to_enum[section]):
            print('+',clean_sec_text(el.text))

In [61]:
print_ticker(docs_all,'cl')

###  cl  ###
---- MANAGEMENT_DISCUSSION -----
+ Colgate Palmolive Company (together with its subsidiaries, “we,” “us,” “our,” the “Company” or “Colgate”) is a caring, innovative growth company reimagining a healthier future for all people, their pets and our planet. We seek to deliver sustainable, profitable growth and superior shareholder returns, as well as to provide Colgate people with an innovative and inclusive work environment. We do this by developing and selling products globally that make people’s and their pets’ lives healthier and more enjoyable and by embracing our sustainability and social impact and diversity, equity and inclusion (“DE&I”) strategies across our organization
+ We are tightly focused on two product segments: Oral, Personal and Home Care; and Pet Nutrition. Within these segments, we follow a closely defined business strategy to grow our key product categories and increase our overall market share. Within the categories in which we compete, we prioritize our

In [62]:
print_ticker(docs_all,'mrk')

###  mrk  ###
---- FINANCIAL_STATEMENTS -----
---- MANAGEMENT_DISCUSSION -----
+ On June 2, 2021, Merck completed the spin off of products from its women’s health, biosimilars and established brands businesses into a new, independent, publicly traded company named Organon & Co. (Organon) through a distribution of Organon’s publicly traded stock to Company shareholders. The distribution is expected to qualify as tax free to the Company and its shareholders for U.S. federal income tax purposes. The established brands included in the transaction consisted of dermatology, non opioid pain management, respiratory, select cardiovascular products, as well as the rest of Merck’s diversified brands franchise. Merck’s existing research pipeline programs will continue to be owned and developed within Merck as planned. The historical results of the women’s health, biosimilars and established brands businesses that were contributed to Organon in the spin off have been reflected as discontinued opera

+ There have been no material changes in market risk exposures that affect the disclosures presented in “Item 7A. Quantitative and Qualitative Disclosures about Market Risk” in the Company’s 2020 Form 10 K filed on February 25, 2021
---- CONTROLS_AND_PROCEDURES -----
+ Management of the Company, with the participation of its Chief Executive Officer and Chief Financial Officer, has evaluated the effectiveness of the Company’s disclosure controls and procedures over financial reporting. Based on their evaluation, the Company’s Chief Executive Officer and Chief Financial Officer have concluded that as of June 30, 2021, the Company’s disclosure controls and procedures are effective. For the second quarter of 2021, there were no changes in internal control over financial reporting that materially affected, or are reasonably likely to materially affect, the Company’s internal control over financial reporting
