In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pickle

In [5]:
x = pickle.load(open('./data/fundamentals2.pkl', 'rb'))
y = pickle.load(open('./data/performance2.pkl', 'rb'))

In [6]:
print(x.shape)
print(y.shape)

(27105, 74)
(27105, 8)


In [7]:
x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27105 entries, 0 to 27104
Data columns (total 74 columns):
 #   Column                                           Non-Null Count  Dtype         
---  ------                                           --------------  -----         
 0   Ticker                                           27105 non-null  object        
 1   Report Date                                      27105 non-null  datetime64[ns]
 2   SimFinId                                         27105 non-null  int64         
 3   Currency                                         27105 non-null  object        
 4   Fiscal Year                                      27105 non-null  int64         
 5   Fiscal Period_x                                  27105 non-null  object        
 6   Publish Date                                     27105 non-null  datetime64[ns]
 7   Restated Date_x                                  27105 non-null  datetime64[ns]
 8   Shares (Basic)_x                    

As there are still some null values, we would need to fix these as any feature engineered ratio could give us an error. Let's fill these null values with zero first.

In [9]:
x = x.fillna(0)

In [12]:
x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27105 entries, 0 to 27104
Data columns (total 74 columns):
 #   Column                                           Non-Null Count  Dtype         
---  ------                                           --------------  -----         
 0   Ticker                                           27105 non-null  object        
 1   Report Date                                      27105 non-null  datetime64[ns]
 2   SimFinId                                         27105 non-null  int64         
 3   Currency                                         27105 non-null  object        
 4   Fiscal Year                                      27105 non-null  int64         
 5   Fiscal Period_x                                  27105 non-null  object        
 6   Publish Date                                     27105 non-null  datetime64[ns]
 7   Restated Date_x                                  27105 non-null  datetime64[ns]
 8   Shares (Basic)_x                    

In [14]:
# Add in two new columns
# Market Cap = Shares (Diluted)_x  x Open Price
# EV = Market cap + LT debt + ST debt - Cash
# EBIT = Net Income - Interest Expense - Income Tax (Expense) Benefit, Net

x['Market Cap'] = x['Shares (Diluted)_x'] * y['Open1']
x['EV'] = x['Market Cap'] + x['Long Term Debt'] + x['Short Term Debt'] - x['Cash, Cash Equivalents & Short Term Investments']
x['EBIT'] = x['Net Income'] - x['Interest Expense, Net'] - x['Income Tax (Expense) Benefit, Net']

In [15]:
x.head()

Unnamed: 0,Ticker,Report Date,SimFinId,Currency,Fiscal Year,Fiscal Period_x,Publish Date,Restated Date_x,Shares (Basic)_x,Shares (Diluted)_x,...,Net Cash from Acquisitions & Divestitures,Net Cash from Investing Activities,Dividends Paid,Cash from (Repayment of) Debt,Cash from (Repurchase of) Equity,Net Cash from Financing Activities,Net Change in Cash,Market Cap,EV,EBIT
0,A,2016-10-31,45846,USD,2016,Q4,2016-12-20,2018-12-20,324000000.0,328000000.0,...,-26000000.0,-78000000.0,-38000000.0,27000000.0,-43000000.0,-56000000.0,90000000,14212240000.0,13827240000.0,167000000.0
1,A,2017-01-31,45846,USD,2017,Q1,2017-03-08,2018-03-06,322000000.0,326000000.0,...,-69000000.0,-101000000.0,-42000000.0,89000000.0,-93000000.0,-58000000.0,-48000000,15661040000.0,15412040000.0,209000000.0
2,A,2017-04-30,45846,USD,2017,Q2,2017-06-06,2018-05-31,321000000.0,325000000.0,...,0.0,-43000000.0,-43000000.0,52000000.0,-75000000.0,-67000000.0,148000000,18037500000.0,17691500000.0,206000000.0
3,A,2017-07-31,45846,USD,2017,Q3,2017-09-06,2018-08-30,321000000.0,326000000.0,...,-57000000.0,-101000000.0,-42000000.0,39000000.0,32000000.0,29000000.0,174000000,19664320000.0,19182320000.0,206000000.0
4,A,2017-10-31,45846,USD,2017,Q4,2017-12-21,2018-12-20,324000000.0,327000000.0,...,0.0,-60000000.0,-43000000.0,-70000000.0,8000000.0,-106000000.0,115000000,22150980000.0,21483980000.0,239000000.0


In [23]:
# Feature engineer many different financial ratios
# EV/EBIT
x['EV_EBIT'] = x['EV']/x['EBIT']

# Operating Income over (Net working capital + Fixed assets)
x['OI_NWCFA'] = x['Operating Income (Loss)']/\
(x['Total Current Assets'] - x['Total Current Liabilities'] + x['Property, Plant & Equipment, Net'])

# Price to earnings
x['PE'] = x['Market Cap']/x['Net Income']

# Price to book
x['PB'] = x['Market Cap']/x['Total Equity']

# Price to sales
x['PS'] = x['Market Cap']/x['Revenue']

# Operating Income over Interest Expense
x['OI_IntExp'] = x['Operating Income (Loss)']/ -x['Interest Expense, Net']

# Working Capital Ratio (CA/CL)
x['WorkingCap'] = x['Total Current Assets']/ x['Total Current Liabilities']

# Return on Equity
x['ROE'] = x['Net Income']/ x['Total Equity']

# Return on Capital Employed
x['ROCE'] = x['EBIT']/ (x['Total Assets'] - x['Total Current Liabilities'])

# Debt/Equity
x['DE'] = x['Total Liabilities']/ x['Total Equity']

# Debt Ratio
x['DebtRatio'] = x['Total Assets']/ x['Total Liabilities']

# Cash Ratio
x['CashRatio'] = x['Cash, Cash Equivalents & Short Term Investments']/ x['Total Current Liabilities']

# Asset Turnover
x['AssetTurnover'] = x['Revenue']/ x['Property, Plant & Equipment, Net']

# Gross Profit Margin
x['GPM'] = x['Gross Profit']/ x['Revenue']

# Working Capital over total assets
x['WC_TA'] = (x['Total Current Assets'] - x['Total Current Liabilities'])/ x['Total Assets']

# Retained earnings over total assets
x['RE_TA'] = x['Retained Earnings']/ x['Total Assets']

# EBIT over total assets
x['EBIT_TA'] = x['EBIT']/ x['Total Assets']

# Equity over liabilities
x['Equity_Liab'] = x['Total Equity']/ x['Total Liabilities']

In [24]:
x.head()

Unnamed: 0,Ticker,Report Date,SimFinId,Currency,Fiscal Year,Fiscal Period_x,Publish Date,Restated Date_x,Shares (Basic)_x,Shares (Diluted)_x,...,ROCE,DE,DebtRatio,CashRatio,AssetTurnover,GPM,WC_TA,RE_TA,EBIT_TA,Equity_Liab
0,A,2016-10-31,45846,USD,2016,Q4,2016-12-20,2018-12-20,324000000.0,328000000.0,...,0.024383,0.83561,2.196731,2.422222,1.738654,0.529253,0.345137,0.781242,0.021427,1.196731
1,A,2017-01-31,45846,USD,2017,Q1,2017-03-08,2018-03-06,322000000.0,326000000.0,...,0.030812,0.830698,2.203807,2.057851,1.633997,0.537957,0.323425,-0.057546,0.02655,1.203807
2,A,2017-04-30,45846,USD,2017,Q2,2017-06-06,2018-05-31,321000000.0,325000000.0,...,0.030165,0.832229,2.201593,2.012637,1.632593,0.537205,0.325973,-0.049027,0.025699,1.201593
3,A,2017-07-31,45846,USD,2017,Q3,2017-09-06,2018-08-30,321000000.0,326000000.0,...,0.029345,0.791585,2.263288,2.06527,1.555866,0.535009,0.333495,-0.031473,0.024936,1.263288
4,A,2017-10-31,45846,USD,2017,Q4,2017-12-21,2018-12-20,324000000.0,327000000.0,...,0.033366,0.742709,2.346422,2.120348,1.570674,0.544155,0.344885,-0.014954,0.028365,1.346422
