In [5]:
# Import packages
import pandas as pd
import edgar
from edgar.xbrl import XBRLS
import os
from dotenv import load_dotenv
from etl.utils.utils import extract_query

# Load .env
load_dotenv()
email = os.getenv("EMAIL")
db_username = os.getenv("DB_USERNAME")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")

# Create db connection string
db_conn_str = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/stocks_etl_ml_db"

In [2]:
# Load sp_tickers from db
ticker_query = """
SELECT *
FROM sp_constituents
"""

# Load into df
df_tickers = extract_query(db_conn_str, ticker_query)

Connection Successful!
SQL script executed successfully!


In [3]:
df_tickers

Unnamed: 0,Actual_Date,Load_Date,Tickers
0,2018-04-24,2018-05-01,A
1,2018-04-24,2018-05-01,AAL
2,2018-04-24,2018-05-01,AAP
3,2018-04-24,2018-05-01,AAPL
4,2018-04-24,2018-05-01,ABBV
...,...,...,...
501,2018-04-24,2018-05-01,XYL
502,2018-04-24,2018-05-01,YUM
503,2018-04-24,2018-05-01,ZBH
504,2018-04-24,2018-05-01,ZION


In [4]:
# Set identity for SEC requirement
edgar.set_identity(email)

In [11]:
# Get data from a random company
comp = df_tickers.loc[250]

ticker = comp['Tickers']
end_date = comp['Load_Date']

# Set date range
start_date = (end_date - pd.DateOffset(years=1)) + pd.Timedelta(days=1)

date_range = f"{start_date.strftime('%Y-%m-%d')}:{end_date.strftime('%Y-%m-%d')}"
print(date_range)

2017-05-02:2018-05-01


In [14]:
# Create company with edgar
company = edgar.Company(ticker)

In [15]:
# Get 10-k in date range for the company
filings = company.get_filings(form = "10-K", filing_date = date_range, is_xbrl = True)

In [22]:
# Parse xbrl
filing = filings[0]

# Get a few key dates
filing_date = filing.filing_date
filing_period = filing.period_of_report

In [34]:
# Get financials
# Parse xbrl
xbrl = filing.xbrl()

# Extract standardized financials
financials = edgar.Financials(xbrl)

balance_sheet = financials.balance_sheet()
income_statement = financials.income_statement()
cash_flow_statement = financials.cashflow_statement()

In [37]:
balance_sheet.to_dataframe()

Unnamed: 0,concept,label,2017-12-31,2016-12-31,level,abstract,dimension
0,us-gaap_AssetsAbstract,ASSETS,,,1,False,False
1,us-gaap_AssetsCurrentAbstract,Current assets:,,,2,False,False
2,us-gaap_CashAndCashEquivalentsAtCarryingValue,Cash and Cash Equivalents,959000000.0,1198000000.0,3,False,False
3,us-gaap_ReceivablesNetCurrent,Accounts Receivable,1993000000.0,1707000000.0,3,False,False
4,us-gaap_PrepaidExpenseCurrent,Prepaid Expenses,146000000.0,123000000.0,3,False,False
5,us-gaap_IncomeTaxesReceivable,Income taxes receivable,47000000.0,34000000.0,3,False,False
6,us-gaap_ShortTermInvestments,"Investments in debt, equity and other securities",46000000.0,40000000.0,3,False,False
7,us-gaap_OtherAssetsCurrent,Other Current Assets,259000000.0,235000000.0,3,False,False
8,us-gaap_AssetsCurrent,Total Current Assets,3450000000.0,3337000000.0,3,False,False
9,us-gaap_PropertyPlantAndEquipmentNet,"Property, Plant and Equipment",440000000.0,406000000.0,2,False,False


In [33]:
income_statement.to_dataframe()

Unnamed: 0,concept,label,2017-12-31,2016-12-31,2015-12-31,level,abstract,dimension
0,us-gaap_SalesRevenueServicesNet,Revenues,8060000000.0,5364000000.0,4326000000.0,1,False,False
1,us-gaap_ReimbursementRevenue,Reimbursed expenses,1679000000.0,1514000000.0,1411000000.0,1,False,False
2,us-gaap_Revenues,Revenue,9739000000.0,6878000000.0,5737000000.0,1,False,False
3,us-gaap_CostOfServicesExcludingDepreciationDep...,"Costs of revenue, exclusive of depreciation an...",-4622000000.0,-3236000000.0,-2705000000.0,1,False,False
4,us-gaap_CostOfReimbursableExpense,"Costs of revenue, reimbursed expenses",-1679000000.0,-1514000000.0,-1411000000.0,1,False,False
5,us-gaap_SellingGeneralAndAdministrativeExpense,"Selling, General and Administrative Expense",-1605000000.0,-1011000000.0,-815000000.0,1,False,False
6,us-gaap_DepreciationDepletionAndAmortization,Depreciation and amortization,-1011000000.0,-289000000.0,-128000000.0,1,False,False
7,us-gaap_RestructuringCharges,Restructuring costs,-63000000.0,-71000000.0,-30000000.0,1,False,False
8,us-gaap_BusinessCombinationIntegrationRelatedC...,Merger related costs,,-87000000.0,,1,False,False
9,us-gaap_AssetImpairmentCharges,Impairment charges,-40000000.0,-28000000.0,-2000000.0,1,False,False


In [38]:
cash_flow_statement.to_dataframe()

Unnamed: 0,concept,label,2017-12-31,2016-12-31,2015-12-31,level,abstract,dimension
0,us-gaap_NetCashProvidedByUsedInOperatingActivi...,Operating activities:,,,,1,False,False
1,us-gaap_ProfitLoss,Profit or Loss,1328000000.0,130000000.0,388000000.0,2,False,False
2,us-gaap_AdjustmentsNoncashItemsToReconcileNetI...,Adjustments to reconcile net income to cash pr...,,,,2,False,False
3,us-gaap_DepreciationDepletionAndAmortization,Depreciation and amortization,-1011000000.0,-289000000.0,-128000000.0,3,False,False
4,us-gaap_AmortizationOfFinancingCostsAndDiscounts,Amortization of debt issuance costs and discount,9000000.0,30000000.0,9000000.0,3,False,False
5,iqv_AmortizationOfAccumulatedOtherComprehensiv...,Amortization of accumulated other comprehensiv...,3000000.0,3000000.0,8000000.0,3,False,False
6,us-gaap_ShareBasedCompensation,Stock-based compensation,106000000.0,80000000.0,38000000.0,3,False,False
7,us-gaap_AssetImpairmentCharges,Impairment charges,-40000000.0,-28000000.0,-2000000.0,3,False,False
8,us-gaap_GainLossOnSaleOfPropertyPlantEquipment,"Gain on disposals of property and equipment, net",-1000000.0,-1000000.0,-1000000.0,3,False,False
9,us-gaap_IncomeLossFromEquityMethodInvestmentsN...,(Earnings) loss from unconsolidated affiliates,-10000000.0,8000000.0,-8000000.0,3,False,False
