---
# Data Ingestion
---
Notebook: 0.1.0-iw-data-ingestion.ipynb<br>
Dependencies: requirements.txt<br>
Date: 2019-OCT-3<br>
Abstract:
```
    The goal of this notebook is ingest stock data from the Financial Modelling Prep API and form a dataframe. This dataframe will be exported to the raw unprocessed directory.
```

---
## 0. Setup
---

In [57]:
import pandas as pd

import fmpclient
from fmpclient import FMPClient
api = FMPClient()
fmpclient.__version__

'0.1.3'

---
## 1. Pull data from API
---

In [58]:
tickers = ['TRP']

In [59]:
res_income_statements = api.company_valuation.income_statement(tickers, period='quarter')
res_financial_ratios  = api.company_valuation.financial_ratios(tickers)
res_balance_sheet     = api.company_valuation.balance_sheet_statement(tickers, period='quarter')
res_cash_flow         = api.company_valuation.cash_flow_statement(tickers, period='quarter')
res_enterprise_value  = api.company_valuation.enterprise_value(tickers, period='quarter')
res_historical_dcf    = api.company_valuation.historical_discounted_cash_flow_value(tickers, period='quarter')
res_dcf               = api.company_valuation.discounted_cash_flow_value(tickers)
res_metrics           = api.company_valuation.key_metrics(tickers, period='quarter')
res_profile           = api.company_valuation.profile(tickers)

url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/financials/income-statement/TRP, path: financials/income-statement/TRP
url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/financial-ratios/TRP, path: financial-ratios/TRP
url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/financials/balance-sheet-statement/TRP, path: financials/balance-sheet-statement/TRP
url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/financials/cash-flow-statement/TRP, path: financials/cash-flow-statement/TRP
url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/enterprise-value/TRP, path: enterprise-value/TRP
url: https://financialmodelingprep.com/api/v3/, request_url: https://financialmodelingprep.com/api/v3/company/historical-discounted-cash-flow/TRP, path: compan

#### Create Dataframe for the Income Statements

In [60]:
df_income_statements = pd.DataFrame(columns=list(res_income_statements['financials'][0].keys()))
for income_statement in res_income_statements['financials']:
    df_income_statements = df_income_statements.append(pd.DataFrame.from_dict({income_statement['date']: income_statement}, orient='index'))
df_income_statements.head()

Unnamed: 0,date,Revenue,Revenue Growth,Cost of Revenue,Gross Profit,R&D Expenses,SG&A Expense,Operating Expenses,Operating Income,Interest Expense,...,Gross Margin,EBITDA Margin,EBIT Margin,Profit Margin,Free Cash Flow margin,EBITDA,EBIT,Consolidated Income,Earnings Before Tax Margin,Net Profit Margin
2019-03-31,2019-03-31,3487000000.0,0.0184,,1698000000.0,,,1480038948.3934,1511000000.0,-438918433.0762,...,0.487,,0.3246,,,,1131750430.6793,,0.3963,
2018-12-31,2018-12-31,3904000000.0,,,1963000000.0,,,1511482867.4151,1823000000.0,-442438917.0152,...,0.5028,,0.3466,,,,1352997285.2007,,0.1939,
2018-09-30,2018-09-30,3156000000.0,,,1390000000.0,,,1454699147.0068,1263000000.0,-443402751.0951,...,0.4404,,0.3075,,,,970567893.6448,,0.3638,
2018-06-30,2018-06-30,3195000000.0,,,1479000000.0,,,1410237052.6952,1327000000.0,-421259248.0749,...,0.4629,,0.3136,,,,1001811867.7337,,0.3302,


#### Create Dataframe for the Financial Ratios

In [61]:
df_ratios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['investmentValuationRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_ratios = df_ratios.append(pd.DataFrame.from_dict({ratio['date']: ratio['investmentValuationRatios']}, orient='index'))

df_debtratios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['debtRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_debtratios = df_debtratios.append(pd.DataFrame.from_dict({ratio['date']: ratio['debtRatios']}, orient='index'))

df_cashFlowIndicatorRatios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['cashFlowIndicatorRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_cashFlowIndicatorRatios = df_cashFlowIndicatorRatios.append(pd.DataFrame.from_dict({ratio['date']: ratio['cashFlowIndicatorRatios']}, orient='index'))

df_profitabilityIndicatorRatios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['profitabilityIndicatorRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_profitabilityIndicatorRatios = df_profitabilityIndicatorRatios.append(pd.DataFrame.from_dict({ratio['date']: ratio['profitabilityIndicatorRatios']}, orient='index'))
    
df_operatingPerformanceRatios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['operatingPerformanceRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_operatingPerformanceRatios = df_operatingPerformanceRatios.append(pd.DataFrame.from_dict({ratio['date']: ratio['operatingPerformanceRatios']}, orient='index'))
    
df_liquidityMeasurementRatios = pd.DataFrame(columns=list(res_financial_ratios['ratios'][0]['liquidityMeasurementRatios'].keys()))
for ratio in res_financial_ratios['ratios']:
    df_liquidityMeasurementRatios = df_liquidityMeasurementRatios.append(pd.DataFrame.from_dict({ratio['date']: ratio['liquidityMeasurementRatios']}, orient='index'))

In [62]:
df_ratios.head(1)

Unnamed: 0,priceBookValueRatio,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,receivablesTurnover,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
2018-12-31,,,2.3002,12.2947,5.2515,0.0,6.7495,4.8519068497326,8.5910992436515,2.3250419913735,0.049395914999966,8.7172533318092,1.0840633103824


In [63]:
df_debtratios.head(1)

Unnamed: 0,debtRatio,debtEquityRatio,longtermDebtToCapitalization,totalDebtToCapitalization,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier
2018-12-31,0.5079,1.7125,0.6000545293436,0.63133489991078,2.8252,0.13047112915746,3.3717363146772


In [64]:
df_cashFlowIndicatorRatios.head(1)

Unnamed: 0,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,receivablesTurnover,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatios,dividendpaidAndCapexCoverageRatios,dividendPayoutRatio
2018-12-31,9.8112,-4.3241,0.6676,0.5348,5.2515,0.47920169603042,-0.44073226544622,0.13047112915746,1.0531812339332,0.6940914866582,0.59509759418974,0.42436520799568


In [65]:
df_profitabilityIndicatorRatios.head(1)

Unnamed: 0,niperEBT,ebtperEBIT,ebitperRevenue,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,effectiveTaxRate,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,nIperEBT,eBTperEBIT,eBITperRevenue
2018-12-31,0.8955007256894,0.64603844350679,0.46779735360772,0.62870092843044,1,0.35675122450471,0.27063381826157,0.1044992743106,0.0662,0.1262,0.0339,0.8955007256894,0.64603844350679,0.46779735360772


In [66]:
df_operatingPerformanceRatios.head(1)

Unnamed: 0,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover
2018-12-31,5.2515,1.0855,32.8876,0.20568996887358,0.13828346138294


In [67]:
df_liquidityMeasurementRatios.head(1)

Unnamed: 0,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle
2018-12-31,0.2941,0.23026417426233,0.034450795612548,-30.9736,67.642,,144.3029,


#### Create DataFrame for Balance Sheet

In [68]:
df_balance_sheet = pd.DataFrame(columns=list(res_balance_sheet['financials'][0].keys()))
for ratio in res_balance_sheet['financials']:
    df_balance_sheet = df_balance_sheet.append(pd.DataFrame.from_dict({ratio['date']: ratio}, orient='index'))
df_balance_sheet.head()

Unnamed: 0,date,Cash and cash equivalents,Short-term investments,Cash and short-term investments,Receivables,Inventories,Total current assets,"Property, Plant & Equipment Net",Goodwill and Intangible Assets,Long-term investments,...,Deposit Liabilities,Total non-current liabilities,Total liabilities,Other comprehensive income,Retained earnings (deficit),Total shareholders equity,Investments,Net Debt,Other Assets,Other Liabilities
2019-03-31,2019-03-31,653134596.6594,0.0,653134596.6594,2214000000.0,407000000.0,,67520000000.0,1363942775.822,5243052954.8348,...,,,68050000000.0,,2732000000.0,25657000000.0,-18725189.1244,43942565126.2078,1630865403.3406,7747300277.1328
2018-12-31,2018-12-31,446000000.0,0.0,446000000.0,2694000000.0,431000000.0,,66503000000.0,0.0,5263042042.7031,...,,,67605000000.0,,2397000000.0,25358000000.0,-42556313.7428,46113372881.3559,1564000000.0,6508627118.6441
2018-09-30,2018-09-30,846076999.9232,0.0,846076999.9232,2170000000.0,381000000.0,,63212000000.0,1418581418.5814,5199415968.6467,...,,,64240000000.0,,1880000000.0,23934000000.0,-181357104.434,42656865826.4812,1715923000.0768,5790057173.5956
2018-06-30,2018-06-30,1185263475.7663,0.0,1185263475.7663,2111000000.0,403000000.0,,61446000000.0,10368413105.8433,5049826362.6755,...,,,63267000000.0,,1577000000.0,23283000000.0,-138909859.5803,42809171221.5008,1730736524.2337,4676565302.7329


#### Create Dataframe for the Cash Flows

In [69]:
df_cash_flow = pd.DataFrame(columns=list(res_cash_flow['financials'][0].keys()))
for ratio in res_cash_flow['financials']:
    df_cash_flow = df_cash_flow.append(pd.DataFrame.from_dict({ratio['date']: ratio}, orient='index'))
df_cash_flow.head()

Unnamed: 0,date,Depreciation & Amortization,Stock-based compensation,Operating Cash Flow,Capital Expenditure,Acquisitions and disposals,Investment purchases and sales,Investing Cash flow,Issuance (repayment) of debt,Issuance (buybacks) of shares,Dividend payments,Financing Cash Flow,Effect of forex changes on cash,Net cash flow / Change in cash,Free Cash Flow,Net Cash/Marketcap
2019-03-31,2019-03-31,608000000.0,,1949000000.0,-2186000000.0,,,-2237000000.0,874840835.8924,50932514.4184,-343794472.3242,721000000.0,-7000000.0,426000000.0,,-1.0375
2018-12-31,2018-12-31,681000000.0,,6676000000.0,-9914000000.0,,,-10140000000.0,371267150.9282,0.0,-335314403.111,2748000000.0,73000000.0,-643000000.0,,-1.4071
2018-09-30,2018-09-30,564000000.0,,4516000000.0,-6713000000.0,,,-7292000000.0,933681702.9125,0.0,-350418811.9573,2741000000.0,47000000.0,12000000.0,,-1.1276
2018-06-30,2018-06-30,1105000000.0,,3217000000.0,-4151000000.0,,,-4478000000.0,834214102.3705,0.0,-316321908.5007,1685000000.0,57000000.0,481000000.0,,-1.115
2018-03-31,2018-03-31,535000000.0,,1412000000.0,-1738000000.0,,,-1865000000.0,,,,602000000.0,29000000.0,178000000.0,,-1.115


#### Create DataFrame for the Enterprise Values

In [70]:
df_enterprise_value = pd.DataFrame(columns=list(res_enterprise_value['enterpriseValues'][0].keys()))
for ratio in res_enterprise_value['enterpriseValues']:
    df_enterprise_value = df_enterprise_value.append(pd.DataFrame.from_dict({ratio['date']: ratio}, orient='index'))
df_enterprise_value.head()

Unnamed: 0,date,Stock Price,Number of Shares,Market Capitalization,- Cash & Cash Equivalents,+ Total Debt,Enterprise Value
2019-03-31,2019-03-31,45.84,,0.0,653134600.0,44595700000.0,43942570000.0
2018-12-31,2018-12-31,35.2597,,0.0,446000000.0,46559370000.0,46113370000.0
2018-09-30,2018-09-30,40.3076,,0.0,846077000.0,43502940000.0,42656870000.0
2018-06-30,2018-06-30,40.8189,,0.0,1185263000.0,43994430000.0,42809170000.0


#### Create DataFrame for Historical Discounted Cash Flow Values

In [71]:
df_historical_dcf = pd.DataFrame(columns=list(res_historical_dcf['historicalDCF'][0].keys()))
for ratio in res_historical_dcf['historicalDCF']:
    df_historical_dcf = df_historical_dcf.append(pd.DataFrame.from_dict({ratio['date']: ratio}, orient='index'))

df_dcf = pd.DataFrame.from_dict({res_dcf['date']: res_dcf},  orient='index')
df_historical_dcf = df_historical_dcf.append(df_dcf[['date', 'Stock Price', 'DCF']])

df_historical_dcf.head()

Unnamed: 0,date,Stock Price,DCF
2019-03-31,2019-03-31,45.84,0.0
2018-12-31,2018-12-31,35.2597,0.0
2018-09-30,2018-09-30,40.3076,0.0
2018-06-30,2018-06-30,40.8189,0.0
2019-10-05,2019-10-05,51.405,51.405


#### Create DataFrame for Key Metrics

In [72]:
df_metrics = pd.DataFrame(columns=list(res_metrics['metrics'][0].keys()))
for ratio in res_metrics['metrics']:
    df_metrics = df_metrics.append(pd.DataFrame.from_dict({ratio['date']: ratio}, orient='index'))
df_metrics.head()

Unnamed: 0,date,Revenue per Share,Net Income per Share,Operating Cash Flow per Share,Free Cash Flow per Share,Cash per Share,Book Value per Share,Tangible Book Value per Share,Shareholders Equity per Share,Interest Debt per Share,...,Graham Number,Graham Net-Net,Working Capital,Tangible Asset Value,Net Current Asset Value,Invested Capital,Average Receivables,Average Payables,Average Inventory,Capex per Share
2019-03-31,2019-03-31,,,,,,,,,,...,,-1.4908,,7456370309.3401,-63145000000.0,,2211000000.0,3822500000.0,395500000.0,
2018-12-31,2018-12-31,,,,,,,,,,...,,-1.9062,,8203096338.6896,-62470000000.0,,,,,
2018-09-30,2018-09-30,,,,,,,,,,...,,-1.5629,,6596480442.6343,-59127000000.0,,,,,
2018-06-30,2018-06-30,,,,,,,,,,...,,-1.5065,,5936131662.3886,-57837000000.0,,,,,
2018-03-31,2018-03-31,,,,,,,,,,...,,-1.5631,,,-56253000000.0,,,,,


#### Create DataFrame for Profile

In [73]:
df_profile = pd.DataFrame(columns=list(res_profile['profile'].keys()))
df_profile = df_profile.append(pd.DataFrame.from_dict({res_profile['symbol']: res_profile['profile']}, orient='index'))
df_profile.head()

Unnamed: 0,price,beta,volAvg,mktCap,lastDiv,range,changes,changesPercentage,companyName,exchange,industry,website,description,ceo,sector,image
TRP,51.38,0.887682,1559821,34329451851.85,2.24202,34.58-46.63,0.03,(+0.06%),TransCanada Corporation,New York Stock Exchange,Oil & Gas - Midstream,http://www.transcanada.com,TransCanada Corp is an energy infrastructure c...,Russell K. Girling,Energy,https://financialmodelingprep.com/images-New-j...


---
## 2. Export
---

#### Merge the DataFrames

In [74]:
dfs = [
    df_ratios, 
    df_debtratios, 
    df_cashFlowIndicatorRatios, 
    df_profitabilityIndicatorRatios, 
    df_operatingPerformanceRatios, 
    df_liquidityMeasurementRatios, 
    df_balance_sheet,
    df_cash_flow,
    df_enterprise_value,
    df_historical_dcf,
    df_metrics
]

In [75]:
df_agg = None
for df in dfs:
    if df_agg is None:
        df_agg = df
    else:
        df_agg = df_agg.merge(df, left_index=True, right_index=True, how='outer')
df_agg.shape

(15, 164)

In [76]:
df_agg.T

Unnamed: 0,2009-12-31,2010-12-31,2011-12-31,2012-12-31,2013-12-31,2014-12-31,2015-12-31,2016-12-31,2017-12-31,2018-03-31,2018-06-30,2018-09-30,2018-12-31,2019-03-31,2019-10-05
priceBookValueRatio,,,,,,,,,,,,,,,
priceToBookRatio,,,,,,,,,,,,,,,
priceToSalesRatio,2.5899,3.7993,3.8688,4.1597,3.6616,3.4104,2.0343,2.5267,3.1682,,,,2.3002,,
priceEarningsRatio,17.0706,21.1875,20.5152,25.5632,20.0756,23.1932,0.0,379.1597,18.2548,,,,12.2947,,
receivablesTurnover_x,7.9214,6.2576,6.541,7.568,7.8262,8.0249,7.6977,7.3312,5.9941,,,,5.2515,,
priceToFreeCashFlowsRatio,0.0,0.0,26.0692,34.2863,0.0,0.0,0.0,0.0,0.0,,,,0.0,,
priceToOperatingCashFlowsRatio,6.7495,6.7495,6.7495,6.7495,6.7495,6.7495,6.7495,6.7495,6.7495,,,,6.7495,,
priceCashFlowRatio,0,0,0,0,0,6.7538609559868,4.5163008895987,6.0244745314653,7.6764844359461,,,,4.8519068497326,,
priceEarningsToGrowthRatio,0,0,0,0,0,15.51185673913,,131.0646412017,12.717140829901,,,,8.5910992436515,,
priceSalesRatio,0,0,0,0,0,2.8023383799706,1.7439851228751,2.433893472543,2.9852043720721,,,,2.3250419913735,,


In [77]:
df_profile.reset_index().to_csv('../data/raw/' + '.'.join(tickers) + '.profile.csv')

In [78]:
df_agg.reset_index().to_csv('../data/raw/' + '.'.join(tickers) + '.csv')