In [45]:
from Edgar_Functions import *
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import logging
import calendar



In [46]:
headers = {'User-Agent': 'feb2126@columbia.edu'}
ticker = 'PPG'

pd.options.display.float_format = (
    lambda x: "{:,.0f}".format(x) if int(x) == x else "{:,.2f}".format(x)
)

**SEC Filing Statement Keys**

In [47]:

statement_keys_map = {
    "balance_sheet": [
        "balance sheet",
        "balance sheets",
        "statement of financial position",
        "consolidated balance sheets",
        "consolidated balance sheet",
        "consolidated financial position",
        "consolidated balance sheets - southern",
        "consolidated statements of financial position",
        "consolidated statement of financial position",
        "consolidated statements of financial condition",
        "combined and consolidated balance sheet",
        "condensed consolidated balance sheets",
        "consolidated balance sheets, as of december 31",
        "dow consolidated balance sheets",
        "consolidated balance sheets (unaudited)",
    ],
    "income_statement": [
        "income statement",
        "income statements",
        "statement of earnings (loss)",
        "statements of consolidated income",
        "consolidated statements of operations",
        "consolidated statement of operations",
        "consolidated statements of earnings",
        "consolidated statement of earnings",
        "consolidated statements of income",
        "consolidated statement of income",
        "consolidated income statements",
        "consolidated income statement",
        "condensed consolidated statements of earnings",
        "consolidated results of operations",
        "consolidated statements of income (loss)",
        "consolidated statements of income - southern",
        "consolidated statements of operations and comprehensive income",
        "consolidated statements of comprehensive income",
    ],
    "cash_flow_statement": [
        "cash flows statement",
        "cash flows statements",
        "statement of cash flows",
        "statements of consolidated cash flows",
        "consolidated statements of cash flows",
        "consolidated statement of cash flows",
        "consolidated statement of cash flow",
        "consolidated cash flows statements",
        "consolidated cash flow statements",
        "condensed consolidated statements of cash flows",
        "consolidated statements of cash flows (unaudited)",
        "consolidated statements of cash flows - southern",
    ],
}


**Setup**

In [48]:
form_types = ['10-K']
fillings = get_filtered_filings(ticker, form_types=form_types, just_accession_number=True)

acc_num = fillings.iloc[0].replace('-', '')
acc_num

'000007987925000034'

**Get Statment File Names Based on Most Recent Filing**

In [49]:
get_statement_file_names_in_filing_summary(ticker, acc_num, headers=headers)

{'consolidated statement of income': 'R3.htm',
 'consolidated statement of comprehensive income': 'R4.htm',
 'consolidated balance sheet': 'R5.htm',
 "consolidated statement of shareholders' equity": 'R6.htm',
 'consolidated statement of cash flows': 'R7.htm'}

**Get Text Data From Filings**

In [50]:
soup = get_statement_soup(
    ticker,
    acc_num,
    "balance_sheet",
    headers,
    statement_keys_map,
)

soup

<html><body><document>
<type>XML
<sequence>41
<filename>R5.htm
<description>IDEA: XBRL DOCUMENT
<text>
<title></title>
<link href="include/report.css" rel="stylesheet" type="text/css"/>
<script src="Show.js" type="text/javascript">/* Do Not Remove This Comment */</script><script type="text/javascript">
							function toggleNextSibling (e) {
							if (e.nextSibling.style.display=='none') {
							e.nextSibling.style.display='block';
							} else { e.nextSibling.style.display='none'; }
							}</script>
<span style="display: none;">v3.25.0.1</span><table border="0" cellspacing="2" class="report" id="idm45773583086944">
<tr>
<th class="tl" colspan="1" rowspan="1"><div style="width: 200px;"><strong>Consolidated Balance Sheet - USD ($)<br/> $ in Millions</strong></div></th>
<th class="th"><div>Dec. 31, 2024</div></th>
<th class="th"><div>Dec. 31, 2023</div></th>
</tr>
<tr class="re">
<td class="pl" style="border-bottom: 0px;" valign="top"><a class="a" href="javascript:void(0);" onclick="

**Pulling Data From the Soup**

In [51]:
label_dict = get_label_dictionary(ticker, headers)


**Income Statement**

In [52]:
statement = process_one_statement(ticker, acc_num, 'income_statement')
rename_statement(statement, label_dict)

Unnamed: 0,2024-12-31,2023-12-31,2022-12-31
us-gaap_IncomeStatementAbstract,,,
"Revenue from Contract with Customer, Excluding Assessed Tax",15845000.0,16242000.0,15614000.0
Cost of Goods and Services Sold,9252000.0,9678000.0,9975000.0
"Selling, General and Administrative Expense",3391000.0,3401000.0,3037000.0
Depreciation,360000.0,360000.0,357000.0
Amortization of Intangible Assets,132000.0,154000.0,145000.0
Research and Development Expense (Excluding Acquired in Process Cost),423000.0,424000.0,434000.0
,241000.0,247000.0,167000.0
"Investment Income, Interest",-177000.0,-140000.0,-54000.0
Restructuring Charges,233000.0,-2000.0,33000.0


**Balance Sheet**

In [53]:
statement = process_one_statement(ticker, acc_num, 'balance_sheet')
rename_statement(statement, label_dict)

Unnamed: 0,2024-12-31,2023-12-31
us-gaap_AssetsCurrentAbstract,,
"Cash and Cash Equivalents, at Carrying Value",1270000.0,1493000.0
Short-term Investments,88000.0,75000.0
"Receivables, Net, Current",2985000.0,3007000.0
"Inventory, Net",1846000.0,1934000.0
"Other Assets, Current",368000.0,922000.0
"Assets, Current",6557000.0,7431000.0
"Property, Plant and Equipment, Net",3464000.0,3450000.0
Goodwill,5690000.0,6115000.0
"Intangible Assets, Net (Excluding Goodwill)",1922000.0,2261000.0


**Cash Flow Statement**

In [54]:
statement = process_one_statement(ticker, acc_num, 'cash_flow_statement')
rename_statement(statement, label_dict)

Unnamed: 0,2024-12-31,2023-12-31,2022-12-31
us-gaap_NetCashProvidedByUsedInOperatingActivitiesAbstract,,,
"Income (Loss) from Continuing Operations, Net of Tax, Including Portion Attributable to Noncontrolling Interest",1377000.0,1262000.0,1035000.0
"Depreciation, Depletion and Amortization, Nonproduction",492000.0,514000.0,502000.0
ppg_NoncashPensionSettlementChargebeforetax,0.0,190000.0,0.0
Asset Impairment Charges,146000.0,160000.0,231000.0
"Share-based Payment Arrangement, Noncash Expense",42000.0,56000.0,34000.0
Deferred Income Tax Expense (Benefit),-97000.0,-187000.0,-151000.0
Restructuring Charges,233000.0,-2000.0,33000.0
Payment for Pension Benefits,-26000.0,-46000.0,-11000.0
Payments for Restructuring,-52000.0,-56000.0,-85000.0
