### Import libraries

In [2]:
import pandas as pd
from pprint import pprint
import numpy as np
import json
from glob import glob
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal

### Load JSON data for a ticker

In [3]:
ticker = "AAM"

IS_files = sorted(glob("jsons/IS/" + "*_{}.json".format(ticker)))
BS_files = sorted(glob("jsons/BS/" + "*_{}.json".format(ticker)))
CF_files = sorted(glob("jsons/CF/" + "*_{}.json".format(ticker)))

### Name of each account

In [4]:
IS_ACCOUNTS = ["Net sales","Cost of goods sold","Gross Profit","Financial expenses","Of which: Interest expense",
            "Cost of sales","Enterprise cost management","Total Operating Expenses","Total revenue financing activities",
            "Net profit from business activities","Profit","Profit before tax","Present corporate income tax expenses",
            "Deferred income taxes expenses","The interests of minority shareholders","Total Cost of profits",
            "Profit after tax corporate income","Volume","Close of Quarter","EPS","Non-adjusted EPS","PE","Book Price"]

ASSETS = ["SHORT-TERM ASSETS","Cash and cash equivalents","Cash","Cash equivalents","Short-term financial investments",
          "Available for sale securities","Provision for diminution in value of available for sale securities (*)",
          "Held to maturity investments","Short-term receivables","Short-term trade accounts receivable",
          "Short-term prepayments to suppliers","Short-term inter-company receivables","Construction contract progress receipts due from customers",
          "Short-term loan receivables","Other short-term receivables","Provision for short-term doubtful debts (*)",
          "Assets awaiting resolution","Inventories","Inventories","Provision for decline in value of inventories",
          "Other short-term assets","Short-term prepayments","Value added tax to be reclaimed",
          "Taxes and other receivables from state authorities","Government bonds","Other short-term assets","LONG-TERM ASSETS",
          "Long-term receivables","Long-term trade receivables","Long-term prepayments to suppliers","Capital at inter-company",
          "Long-term inter-company receivables","Long-term loan receivables","Other long-term receivables",
          "Provision for long-term doubtful debts","Fixed assets","Tangible fixed assets","Cost","Accumulated depreciation",
          "Financial leased fixed assets","Cost","Accumulated depreciation","Intangible fixed assets","Cost",
          "Accumulated depreciation","Investment properties","Cost","Accumulated depreciation","Long-term assets in progress",
          "Long-term production in progress","Construction in progress","Long-term financial investments",
          "Investments in subsidiaries","Investments in associates, joint-ventures","Investments in other entities",
          "Provision for diminution in value of long-term investments","Held to maturity investments",
          "Other long-term investments","Other long-term assets","Long-term prepayments","Deferred income tax assets",
          "Long-term equipment, supplies, spare parts","Other long-term assets","Goodwill","TOTAL ASSETS"]

LIABILITIES = ["LIABILITIES","Short -term liabilities","Short-term trade accounts payable","Short-term advances from customers",
               "Taxes and other payables to state authorities","Payable to employees","Short-term acrrued expenses",
               "Short-term inter-company payables","Construction contract progress payments due to suppliers",
               "Short-term unearned revenue","Other short-term payables","Short-term borrowings and financial leases",
               "Provision for short-term liabilities","Bonus and welfare fund","Price stabilization fund",
               "Government bonds","Long-term liabilities","Long-term trade payables","Long-term advances from customers",
               "Long-term acrrued expenses","Inter-company payables on business capital","Long-term inter-company payables",
               "Long-term unearned revenue","Other long-term liabilities","Long-term borrowings and financial leases",
               "Convertible bonds","Preferred stock (Debts)","Deferred income tax liabilities","Provision for long-term liabilities",
               "Fund for technology development","Provision for severance allowances"]

EQUITY = ["OWNER'S EQUITY","Owner's equity","Owner's capital","Common stock with voting right","Preferred stock","Share premium",
          "Convertible bond option","Other capital of owners","Treasury shares","Assets revaluation differences",
          "Foreign exchange differences","Investment and development fund","Fund to support corporate restructuring",
          "Other funds from owner's equity","Undistributed earnings after tax","Accumulated retained earning at the end of the previous period",
          "Undistributed earnings in this period","Reserves for investment in construction","Minority's interest",
          "Financial reserves","Other resources and funds","Subsidized not-for-profit funds","Funds invested in fixed assets",
          "MINORITY'S INTEREST","TOTAL OWNER'S EQUITY AND LIABILITIES"]

CF_indirect = ['net_profit_before_tax','adjustments','depreciation_amortization','provisions','net_profit_from_investment_in_joint_venture',
                    'write_off_fixed_assets','unrealised_foreign_exchange_profit','profit_from_disposals_of_fixed_assets',
                    'profit_from_investing_activities','profit_from_deposit','interest_income','interest_expense',
                    'payments_direct_from_profit','operating_profit_before_working_capital_changes','increase_decrease_in_receivables',
                    'increase_decrease_in_inventories','increase_decrease_in_payables','increase_decrease_in_prepaid_expense',
                    'increase_decrease_in_current_assets','cash_paid_for_interest','cash_paid_for_taxes',
                    'other_cash_from_operating_activities','other_payments_from_operating_activities','net_cash_from_operating',
                    'cash_paid_for_new_PPE','cash_collected_from_PPE_sales','cash_paid_for_loans','cash_collected_from_loans',
                    'investment_in_joint_venture','purchases_of_short_term_investment','cash_paid_for_investments_in_other_companies',
                    'cash_collected_from_investments_in_other_companies','interest_collected_from_deposits',
                    'cash_collected_from_interest','purchases_of_minority equity','net_cash_from_investing',
                    'cash_collected_from_issuing_shares','cash_paid_for_capital_contribution','cash_paid_for_short_term_borrowing',
                    'cash_paid_for_principles','cash_paid_for_financial_lease','other_cash_paid_for_financial_activities',
                    'purchase_from_capitalization_issues','dividends_paid','minority_equity_in_joint_venture',
                    'social_welfare_expenses','net_cash_from_financing','net_cash_flow','cash_cash_equivalent_begin',
                    'effects_of_exchange_rate','cash_cash_equivalent_end']

CF_direct = ['cash_collected_from_customers','cash_paid_for_suppliers','cash_paid_for_employees','cash_paid_for_interest',
                  'cash_paid_for_taxes','cash_paid_for_VAT','other_cash_collected_for_operating_activities',
                  'other_cash_paid_for_operating_activities','net_cash_from_operating','cash_paid_for_new_PPE',
                  'cash_collected_from_PPE_sales','cash_paid_for_loans','cash_collected_from_loans',
                  'cash_paid_for_investments_in_other_companies','cash_collected_from_investments_in_other_companies',
                  'cash_collected_from_interest','net_cash_from_investing','cash_collected_from_issuing_shares',
                  'cash_paid_for_capital_contribution','cash_paid_for_short_term_borrowing','cash_paid_for_principles',
                  'cash_paid_for_PPE_lease','cash_paid_for_financial_lease','dividends_paid','cash_paid_for_firm_funds',
                  'net_cash_from_financing','net_cash_flow','cash_cash_equivalent_begin','effects_of_exchange_rate',
                  'cash_cash_equivalent_end']

### Algorithms for processing NaNs

In [5]:
# GET DATAPOINT FROM AN ACCOUNT, TREATING 'N/A' AS np.nan
def process_array(row, status, account):
    if row[status][account] == 'N/A':
        return np.nan
    else:
        return row[status][account]
    
# SUM ACROSS COLUMNS. WHEN ALL COMPONENTS ARE NaN, THE SUM IS NaN    
def sum_across_cols(df):
    result = df.isnull().sum(axis=1)
    result.reindex(df.index, copy=False)
    indexes = np.nonzero(result == df.shape[1])    
    temp = df.fillna(0)
    result = df.sum(skipna=True,axis=1)
    result.iloc[indexes] = np.nan    
    return result

### Read data into DataFrames

In [6]:
for file in IS_files:
    IS_df = pd.read_json(file)
    IS_df['index'] = np.array([row['quarter'] for row in IS_df['data']])
    IS_df.set_index('index', inplace=True)
for file in BS_files:
    BS_df = pd.read_json(file)
    BS_df['index'] = np.array([row['quarter'] for row in BS_df['data']])
    BS_df.set_index('index', inplace=True)
for file in CF_files:
    CF_df = pd.read_json(file)
    CF_df['index'] = np.array([row['quarter'] for row in CF_df['data']])
    CF_df.set_index('index', inplace=True)

# Income statement
for account in IS_ACCOUNTS:
    array = np.array([ process_array(row, 'income status', account) for row in IS_df['data'] ])
    IS_df[account] = array
IS_df.drop('data', 1, inplace=True)

# Balance sheet
for account in ASSETS:
    array = np.array([ process_array(row, 'assets', account) for row in BS_df['data'] ])
    BS_df[account] = array
#     BS_df[account] = (array - np.nanmean(array)) / np.nanstd(array)
for account in LIABILITIES:
    array = np.array([ process_array(row, 'liabilities', account) for row in BS_df['data'] ])
    BS_df[account] = array
#     BS_df[account] = (array - np.nanmean(array)) / np.nanstd(array)
for account in EQUITY:
    array = np.array([ process_array(row, 'equity', account) for row in BS_df['data'] ])
    BS_df[account] = array
#     BS_df[account] = (array - np.nanmean(array)) / np.nanstd(array)
BS_df.drop('data', 1, inplace=True)

# Cash flow statement
if CF_df['type'][0] == "indirect":
    for account in CF_indirect:
        array = np.array([ process_array(row, 'cash_flow_status', account) for row in CF_df['data'] ])
        CF_df[account] = array

    CF_df['net_cash_from_operating'].fillna(sum_across_cols(CF_df[['operating_profit_before_working_capital_changes',
                                                                      'increase_decrease_in_receivables',
                                                                      'increase_decrease_in_inventories',
                                                                      'increase_decrease_in_payables',
                                                                      'increase_decrease_in_prepaid_expense',
                                                                      'increase_decrease_in_current_assets',
                                                                      'cash_paid_for_interest',
                                                                      'cash_paid_for_taxes',
                                                                      'other_cash_from_operating_activities',
                                                                      'other_payments_from_operating_activities']]),inplace=True)
    CF_df['net_cash_from_investing'].fillna(sum_across_cols(CF_df[['cash_paid_for_new_PPE',
                                                                    'cash_collected_from_PPE_sales',
                                                                    'cash_paid_for_loans',
                                                                    'cash_collected_from_loans',
                                                                    'investment_in_joint_venture',
                                                                    'purchases_of_short_term_investment',
                                                                    'cash_paid_for_investments_in_other_companies',
                                                                    'cash_collected_from_investments_in_other_companies',
                                                                    'interest_collected_from_deposits',
                                                                    'cash_collected_from_interest',
                                                                    'purchases_of_minority equity']]))
    CF_df['net_cash_from_financing'].fillna(sum_across_cols(CF_df[['cash_collected_from_issuing_shares',
                                                                    'cash_paid_for_capital_contribution',
                                                                    'cash_paid_for_short_term_borrowing',
                                                                    'cash_paid_for_principles',
                                                                    'cash_paid_for_financial_lease',
                                                                    'other_cash_paid_for_financial_activities',
                                                                    'purchase_from_capitalization_issues',
                                                                    'dividends_paid',
                                                                    'minority_equity_in_joint_venture',
                                                                    'social_welfare_expenses']]))
else:
    for account in CF_direct:
        array = np.array([ process_array(row, 'cash_flow_status', account) for row in CF_df['data'] ])
        CF_df[account] = array
        
    CF_df['net_cash_from_operating'].fillna(sum_across_cols(CF_df[['cash_collected_from_customers',
                                                                  'cash_paid_for_suppliers',
                                                                  'cash_paid_for_employees',
                                                                  'cash_paid_for_interest',
                                                                  'cash_paid_for_taxes',
                                                                  'cash_paid_for_VAT',
                                                                  'other_cash_collected_for_operating_activities',
                                                                  'other_cash_paid_for_operating_activities']]))
    CF_df['net_cash_from_investing'].fillna(sum_across_cols(CF_df[['cash_paid_for_new_PPE',
                                                                      'cash_collected_from_PPE_sales',
                                                                      'cash_paid_for_loans',
                                                                      'cash_collected_from_loans',
                                                                      'cash_paid_for_investments_in_other_companies',
                                                                      'cash_collected_from_investments_in_other_companies',
                                                                      'cash_collected_from_interest']]))
    CF_df['net_cash_from_financing'].fillna(sum_across_cols(CF_df[['cash_collected_from_issuing_shares',
                                                                      'cash_paid_for_capital_contribution',
                                                                      'cash_paid_for_short_term_borrowing',
                                                                      'cash_paid_for_principles',
                                                                      'cash_paid_for_PPE_lease',
                                                                      'cash_paid_for_financial_lease',
                                                                      'dividends_paid',
                                                                      'cash_paid_for_firm_funds']]))
        
CF_df['net_cash_flow'].fillna(sum_across_cols(CF_df[['net_cash_from_operating', 'net_cash_from_investing', 'net_cash_from_financing']]))
CF_df.drop('data', 1, inplace=True)

### Combines all 3 DataFrames

In [7]:
IS_CF = pd.merge(IS_df, CF_df, left_index=True, right_index=True, how='outer')
IS_CF_BS = pd.merge(IS_CF, BS_df, left_index=True, right_index=True, how='outer')

# Reindex
reindex = sorted(IS_CF_BS.index, key = lambda x: int(x.split(" ")[1]))
IS_CF_BS_reindexed = IS_CF_BS.reindex(reindex)
print (reindex)
# IS_CF_BS_reindexed.index

['Q4 2011', 'Q3 2011', 'Q2 2011', 'Q1 2011', 'Q4 2012', 'Q3 2012', 'Q2 2012', 'Q1 2012', 'Q4 2013', 'Q3 2013', 'Q2 2013', 'Q1 2013', 'Q4 2014', 'Q3 2014', 'Q2 2014', 'Q1 2014', 'Q4 2015', 'Q3 2015', 'Q2 2015', 'Q1 2015', 'Q4 2016', 'Q3 2016', 'Q2 2016', 'Q1 2016']


### Calculate financial indexes

In [8]:
IS_CF_BS_reindexed['current_ratio'] = IS_CF_BS_reindexed['SHORT-TERM ASSETS'] / IS_CF_BS_reindexed['Short -term liabilities']
IS_CF_BS_reindexed['quick_ratio'] = sum_across_cols(IS_CF_BS_reindexed[['Cash and cash equivalents', 'Short-term receivables',  'Short-term trade accounts receivable', 'Short-term inter-company receivables', 'Short-term loan receivables', 'Other short-term receivables', 'Taxes and other receivables from state authorities']]) / IS_CF_BS_reindexed['Short -term liabilities']
IS_CF_BS_reindexed['cash_ratio'] = IS_CF_BS_reindexed['Cash and cash equivalents'] / IS_CF_BS_reindexed['Short -term liabilities']
IS_CF_BS_reindexed['long_term_debt_to_equity'] = IS_CF_BS_reindexed['Long-term liabilities'] / (IS_CF_BS_reindexed['TOTAL OWNER\'S EQUITY AND LIABILITIES'] - IS_CF_BS_reindexed['LIABILITIES'])
IS_CF_BS_reindexed['total_debt_to_equity'] = IS_CF_BS_reindexed['LIABILITIES'] / (IS_CF_BS_reindexed['TOTAL OWNER\'S EQUITY AND LIABILITIES'] - IS_CF_BS_reindexed['LIABILITIES'])
IS_CF_BS_reindexed['debt_ratio'] = IS_CF_BS_reindexed['LIABILITIES'] / IS_CF_BS_reindexed['TOTAL ASSETS']
IS_CF_BS_reindexed['financial_leverage'] = IS_CF_BS_reindexed['TOTAL ASSETS'] / (IS_CF_BS_reindexed['TOTAL OWNER\'S EQUITY AND LIABILITIES'] - IS_CF_BS_reindexed['LIABILITIES'])
IS_CF_BS_reindexed['interest_coverage'] = sum_across_cols(IS_CF_BS_reindexed[['Profit before tax', 'Of which: Interest expense']]) / IS_CF_BS_reindexed['Of which: Interest expense']
IS_CF_BS_reindexed['net_profit_margin'] = IS_CF_BS_reindexed['Profit after tax corporate income'] / IS_CF_BS_reindexed['Net sales']
IS_CF_BS_reindexed['operating_profit_margin'] = sum_across_cols(IS_CF_BS_reindexed[['Profit before tax', 'Of which: Interest expense']]) / IS_CF_BS_reindexed['Net sales']
IS_CF_BS_reindexed['gross_profit_margin'] = IS_CF_BS_reindexed['Gross Profit'] / IS_CF_BS_reindexed['Net sales']
IS_CF_BS_reindexed['pretax_margin'] = IS_CF_BS_reindexed['Profit before tax'] / IS_CF_BS_reindexed['Net sales']

IS_CF_BS_reindexed['average_total_assets'] = IS_CF_BS_reindexed['TOTAL ASSETS'].rolling(2, min_periods=1).mean()
IS_CF_BS_reindexed['ROA'] = IS_CF_BS_reindexed['Profit after tax corporate income'] / IS_CF_BS_reindexed['average_total_assets']
IS_CF_BS_reindexed['operating_ROA'] = sum_across_cols(IS_CF_BS_reindexed[['Profit before tax', 'Of which: Interest expense']]) / IS_CF_BS_reindexed['average_total_assets']

IS_CF_BS_reindexed['average_total_capital'] = IS_CF_BS_reindexed['Owner\'s capital'].rolling(2, min_periods=1).mean()
IS_CF_BS_reindexed['ROC'] = sum_across_cols(IS_CF_BS_reindexed[['Profit before tax', 'Of which: Interest expense']]) / IS_CF_BS_reindexed['average_total_capital']

IS_CF_BS_reindexed['average_total_equity'] = (IS_CF_BS_reindexed['TOTAL OWNER\'S EQUITY AND LIABILITIES'] - IS_CF_BS_reindexed['LIABILITIES']).rolling(2, min_periods=1).mean()
IS_CF_BS_reindexed['ROE'] = IS_CF_BS_reindexed['Profit after tax corporate income'] / IS_CF_BS_reindexed['average_total_equity']

IS_CF_BS_reindexed['average_receivables'] = sum_across_cols(IS_CF_BS_reindexed[['Short-term receivables', 'Short-term trade accounts receivable']]).rolling(2, min_periods=1).mean()
IS_CF_BS_reindexed['receivables_turnover'] = IS_CF_BS_reindexed['Net sales'] / IS_CF_BS_reindexed['average_receivables']

IS_CF_BS_reindexed['average_inventory'] = sum_across_cols(IS_CF_BS_reindexed[['Inventories', 'Provision for decline in value of inventories']]).rolling(2, min_periods=1).mean()
IS_CF_BS_reindexed['inventory_turnover'] = IS_CF_BS_reindexed['Cost of goods sold'] / IS_CF_BS_reindexed['average_inventory']

IS_CF_BS_reindexed['average_payables'] = IS_CF_BS_reindexed['Short-term trade accounts payable'].rolling(window=2, min_periods=1).mean()
IS_CF_BS_reindexed['payables_turnover'] = (sum_across_cols(IS_CF_BS_reindexed[['Inventories', 'Provision for decline in value of inventories']]).diff(1).fillna(0) + IS_CF_BS_reindexed['Cost of goods sold']) / IS_CF_BS_reindexed['average_payables']

IS_CF_BS_reindexed['book_value_per_share'] = (IS_CF_BS_reindexed['TOTAL OWNER\'S EQUITY AND LIABILITIES'] - IS_CF_BS_reindexed['LIABILITIES']) / IS_CF_BS_reindexed['Volume']
IS_CF_BS_reindexed['price_earnings'] = IS_CF_BS_reindexed['PE']
IS_CF_BS_reindexed['book_price'] = IS_CF_BS_reindexed['Book Price']
IS_CF_BS_reindexed['cash_ROA'] = IS_CF_BS_reindexed['net_cash_from_operating'] / IS_CF_BS_reindexed['average_total_assets']
IS_CF_BS_reindexed['cash_flow_on_revenue'] = IS_CF_BS_reindexed['net_cash_from_operating'] / IS_CF_BS_reindexed['Net sales']
IS_CF_BS_reindexed['cash_ROE'] = IS_CF_BS_reindexed['net_cash_from_operating'] / IS_CF_BS_reindexed['average_total_equity']
IS_CF_BS_reindexed['cash_on_income'] = IS_CF_BS_reindexed['net_cash_from_operating'] / sum_across_cols(IS_CF_BS_reindexed[['Profit before tax', 'Of which: Interest expense']])
IS_CF_BS_reindexed['debt_coverage'] = IS_CF_BS_reindexed['net_cash_from_operating'] / IS_CF_BS_reindexed['LIABILITIES']
IS_CF_BS_reindexed['cash_interest_coverage'] = sum_across_cols(IS_CF_BS_reindexed[['net_cash_from_operating', 'cash_paid_for_interest', 'cash_paid_for_taxes']]) / IS_CF_BS_reindexed['cash_paid_for_interest']
IS_CF_BS_reindexed['reinvestment'] = IS_CF_BS_reindexed['net_cash_from_operating'] / IS_CF_BS_reindexed['cash_paid_for_new_PPE']
IS_CF_BS_reindexed['total_net_accruals'] = IS_CF_BS_reindexed['TOTAL ASSETS'].diff(1).fillna(0) - IS_CF_BS_reindexed['LIABILITIES'].diff(1).fillna(0) - IS_CF_BS_reindexed['net_cash_flow']
IS_CF_BS_reindexed['cash_earnings'] = IS_CF_BS_reindexed['total_net_accruals'] - IS_CF_BS_reindexed['Profit after tax corporate income']

In [9]:
IS_CF_BS_reindexed
# print ((IS_CF_BS_reindexed[['net_cash_from_operating', 'cash_paid_for_interest', 'cash_paid_for_taxes']]).sum(axis=1, skipna=False))
# print (sum_across_cols(IS_CF_BS_reindexed[['net_cash_from_operating', 'cash_paid_for_interest', 'cash_paid_for_taxes']]))

Unnamed: 0_level_0,ticker_x,Net sales,Cost of goods sold,Gross Profit,Financial expenses,Of which: Interest expense,Cost of sales,Enterprise cost management,Total Operating Expenses,Total revenue financing activities,...,book_price,cash_ROA,cash_flow_on_revenue,cash_ROE,cash_on_income,debt_coverage,cash_interest_coverage,reinvestment,total_net_accruals,cash_earnings
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Q4 2011,AAM,150945,133971,16973,1333,276.0,4449,3994,9776,2394,...,27.4,-0.047483,-0.103369,-0.0545,-0.198863,-0.368795,58.383212,3.30572,19665.0,7516.0
Q3 2011,AAM,170608,133868,36740,897,239.0,13665,3342,17904,3798,...,26.9,0.005653,0.011383,0.006847,0.084567,0.025028,19.799163,-3.629907,8855.0,-8185.0
Q2 2011,AAM,191880,161272,30608,2081,273.0,7492,3689,13262,8403,...,27.1,0.052859,0.095461,0.064979,0.700941,0.354465,-48.361789,-20.814773,-1137.0,-20437.0
Q1 2011,AAM,125864,97731,28133,829,226.0,15394,3184,19407,11198,...,27.2,0.077747,0.20422,0.090693,1.278806,0.60184,-96.2607,,-2998.0,-17924.0
Q4 2012,AAM,135695,117093,18602,2706,63.0,12487,2730,17923,1451,...,24.4,1.059255,2.388879,1.201911,126.971798,10.828401,190.258611,,-31382.0,-33310.0
Q3 2012,AAM,97253,89070,8183,37,20.0,5389,2317,7743,1363,...,25.3,-0.034645,-0.102763,-0.038494,-5.315957,-0.360157,573.7,6.146371,32119.0,30666.0
Q2 2012,AAM,132742,119058,13684,-342,77.0,9614,3414,12686,2904,...,26.5,0.125814,0.282623,0.139292,9.066216,1.25237,-313.867925,-3.127637,-2106.0,-5914.0
Q1 2012,AAM,119878,107541,12337,834,179.0,4502,3330,8666,1985,...,27.2,0.003459,0.009051,0.003889,0.113233,0.027478,-10.044693,0.120676,-6996.0,-14044.0
Q4 2013,AAM,153514,137591,15923,113,64.0,9202,3953,13268,1783,...,24.8,0.070891,0.14426,0.081696,4.802863,0.513257,-414.86,-381.827586,-50215.0,-54253.0
Q3 2013,AAM,148790,133827,14963,328,76.0,10456,2557,13341,851,...,24.5,0.132334,0.263808,0.152493,14.71766,1.112774,-515.473684,-373.828571,-29255.0,-31194.0


In [10]:
bg = IS_CF_BS_reindexed.columns.get_loc('current_ratio')
end = IS_CF_BS_reindexed.columns.get_loc('cash_earnings')+1
print (bg, end)

indexes_df = IS_CF_BS_reindexed.iloc[:, bg:end]
indexes_df

# for col in ['average_total_assets', 'average_total_capital', 'average_total_equity', 'average_receivables', 'average_inventory', 'average_payables']:
#     indexes_df.drop(col, axis=1, inplace=True)

188 225


Unnamed: 0_level_0,current_ratio,quick_ratio,cash_ratio,long_term_debt_to_equity,total_debt_to_equity,debt_ratio,financial_leverage,interest_coverage,net_profit_margin,operating_profit_margin,...,book_price,cash_ROA,cash_flow_on_revenue,cash_ROE,cash_on_income,debt_coverage,cash_interest_coverage,reinvestment,total_net_accruals,cash_earnings
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Q4 2011,7.522194,7.291321,1.493083,0.031632,0.147778,0.128751,1.147778,284.278986,0.080486,0.519799,...,27.4,-0.047483,-0.103369,-0.0545,-0.198863,-0.368795,58.383212,3.30572,19665.0,7516.0
Q3 2011,4.116112,4.440459,1.022263,0.030457,0.276208,0.216429,1.276208,96.083682,0.099878,0.134601,...,26.9,0.005653,0.011383,0.006847,0.084567,0.025028,19.799163,-3.629907,8855.0,-8185.0
Q2 2011,5.90631,6.774122,1.944465,0.028506,0.182689,0.15447,1.182689,95.721612,0.100584,0.136189,...,27.1,0.052859,0.095461,0.064979,0.700941,0.354465,-48.361789,-20.814773,-1137.0,-20437.0
Q1 2011,6.966011,7.741236,2.260871,0.025033,0.150394,0.130732,1.150394,88.938053,0.118588,0.159696,...,27.2,0.077747,0.20422,0.090693,1.278806,0.60184,-96.2607,,-2998.0,-17924.0
Q4 2012,8.200925,5.31566,1.599642,0.012274,0.117201,0.104906,1.117201,40.52381,0.014208,0.018814,...,24.4,1.059255,2.388879,1.201911,126.971798,10.828401,190.258611,,-31382.0,-33310.0
Q3 2012,9.146402,6.619876,2.214612,0.012156,0.105181,0.095171,1.105181,94.0,0.01494,0.019331,...,25.3,-0.034645,-0.102763,-0.038494,-5.315957,-0.360157,573.7,6.146371,32119.0,30666.0
Q2 2012,8.925335,7.838806,2.950047,0.012702,0.108992,0.09828,1.108992,53.74026,0.028687,0.031173,...,26.5,0.125814,0.282623,0.139292,9.066216,1.25237,-313.867925,-3.127637,-2106.0,-5914.0
Q1 2012,7.747093,7.334731,1.98724,0.024037,0.139455,0.122387,1.139455,53.530726,0.058793,0.079931,...,27.2,0.003459,0.009051,0.003889,0.113233,0.027478,-10.044693,0.120676,-6996.0,-14044.0
Q4 2013,5.866184,4.730595,1.490126,0.011942,0.166587,0.142799,1.166587,72.046875,0.026304,0.030036,...,24.8,0.070891,0.14426,0.081696,4.802863,0.513257,-414.86,-381.827586,-50215.0,-54253.0
Q3 2013,7.262766,5.380049,1.053589,0.012565,0.137901,0.121189,1.137901,35.092105,0.013032,0.017925,...,24.5,0.132334,0.263808,0.152493,14.71766,1.112774,-515.473684,-373.828571,-29255.0,-31194.0


In [21]:
#liquidity 0:3
#solvency 3:8
#profitability 8:16
#activity 16:19
#market 19:22
#cashflow 22:29
#accruals 29:31

indexes_df['liquidity'] = indexes_df[indexes_df.columns[0:3]].mean(axis=1)
indexes_df['solvency'] = indexes_df[indexes_df.columns[3:8]].mean(axis=1)
indexes_df['profitability'] = indexes_df[indexes_df.columns[8:16]].mean(axis=1)
indexes_df['activity'] = indexes_df[indexes_df.columns[16:19]].mean(axis=1)
indexes_df['market'] = indexes_df[indexes_df.columns[19:22]].mean(axis=1)
indexes_df['cash_flow'] = indexes_df[indexes_df.columns[22:29]].mean(axis=1)
indexes_df['accruals'] = indexes_df[indexes_df.columns[29:31]].mean(axis=1)

categories_avg_df = indexes_df[['liquidity', 'solvency', 'profitability', 'activity', 'market', 'cash_flow', 'accruals']]
categories_avg_df = (categories_avg_df - categories_avg_df.mean()) / categories_avg_df.std()
categories_avg_df.corr().abs().where(np.triu(categories_avg_df.corr().abs(), k=1).astype(np.bool))


# indexes_df[['payables_turnover', 'inventory_turnover', 'receivables_turnover', 'average_inventory']]
# IS_CF_BS_reindexed['Cost of goods sold'] / IS_CF_BS_reindexed[['Provision for decline in value of inventories','Inventories']].sum(axis=1, skipna=True).rolling(2, min_periods=1).mean()
# indexes_df[indexes_df.columns[16:19]]
# IS_CF_BS_reindexed[['average_receivables','Short-term receivables', 'Short-term trade accounts receivable']]

Unnamed: 0,liquidity,solvency,profitability,activity,market,cash_flow,accruals
liquidity,,0.025821,0.436153,0.079263,0.184235,0.430363,0.124375
solvency,,,0.269794,0.507226,0.261463,0.04851,0.141318
profitability,,,,0.681073,0.434991,0.604496,0.241905
activity,,,,,0.641862,0.181097,0.002643
market,,,,,,0.252992,0.369182
cash_flow,,,,,,,0.035071
accruals,,,,,,,


In [12]:
# indexes_df_normalized = (indexes_df - indexes_df.mean()) / indexes_df.std()
# corr_matrix1 = indexes_df_normalized.corr().abs()
# corr_matrix_diag1 = corr_matrix1.where(np.triu(np.ones(corr_matrix1.shape), k=1).astype(np.bool))
# sorted_corr_matrix_diag1 = corr_matrix_diag1.stack()
# corr_matrix_diag1