**Imports**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from collections import Counter, OrderedDict
import import_ipynb
import UTILS as utils

import FundamentalAnalysis as fa

importing Jupyter notebook from UTILS.ipynb


In [2]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [3]:
api_key = "Here would go my personal API key"

------------

**Load data**

*Files with data from year with displacement -X*

In [None]:
file1 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_dataframe_with_ratios_ttm_Xyeardisp.csv'
file2 = 'DATA_ttm/S_P500_data_ttm_Xyeardisp/S_P500_companies_financials_data_ttm_Xyeardisp.pickle'

In [106]:
data_loaded = utils.data_loading(file1, file2)

In [107]:
df = data_loaded[0]
dict_companies = data_loaded[1]

#Delete two of the ratios that were computed due to its very close similarity to other ones (and consequently giving
#false relevance). Asset to equity to equity multiplier, and total debt to capitalization to total debt to capital.
df.drop(["_asset_to_equity","_total_debt_to_capitalization"], axis=1, inplace=True)

In [108]:
df.head(2)

Unnamed: 0_level_0,_disp,_PAT_margin,_EBITDA_margin,_return_on_equity,_return_on_assets,_return_on_capital_employed,_gross_margin,_operating_margin,_operating_cash_flow_ratio,_return_on_invested_capital,_return_on_sales,_fixed_asset_turnover,_working_capital_turnover,_total_asset_turnover,_inventory_turnover,_days_of_inventory_on_hand,_accounts_receivables_turnover,_payables_turnover,_debt_to_assets,_interest_coverage,_debt_to_equity,_equity_multiplier,_total_debt_to_capital,_net_debt_to_EBITDA,_degree_financial_leverage,_earnings_per_share,_book_value_per_share,_cash_earnings_per_share,_price_to_earnings,_price_to_book_value,_price_to_sales,_price_to_free_cash_flow,_price_earnings_to_growth,_dividend_yield,_dividend_payout,_enterprise_value_to_ebitda,_enterprise_value_to_sales,_current_ratio,_quick_ratio,_cash_ratio,_days_of_sales_outstanding,_days_of_sales_in_inventory,_days_of_payables_outstanding,_operating_cycle,_cash_conversion_cycle
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
MMM,2,0.153873,0.256938,0.120828,0.028945,0.048458,0.476497,0.207315,0.021889,0.048458,0.207315,0.81281,4.847958,0.18846,1.017196,88.557504,1.60539,1.96058,0.759778,14.497171,1.687505,4.208227,0.656201,7.634353,1.31035,2.155,17.76768,3.136049,19.699515,9.179421,11.621096,70.936366,0.246938,0.008968,0.692397,53.762023,3.098002,1.824179,1.151772,0.525355,56.133259,227.651552,45.956976,144.690763,98.733787
AOS,2,0.123555,0.187507,0.054431,0.029718,0.049049,0.394449,0.155319,0.246905,0.049049,0.155319,1.254168,3.63324,0.240488,1.443708,62.37685,1.200257,0.91841,0.453851,43.605517,0.179391,1.831239,0.154779,0.046576,1.137364,0.562114,10.325532,0.696915,21.94132,4.751008,10.801056,3270.928668,0.698811,0.004638,0.404647,57.884529,2.285111,2.112919,1.61128,0.407993,75.110708,304.61565,98.021482,137.487558,39.466077


----------------

**Add sectors and industries info to the dataset**

In [110]:
sectors_df, industries_df = utils.get_company_sectors_and_industries(df, dict_companies)

In [111]:
if set(['sector','industry']).issubset(df.columns) == False:
    df.insert(0, "sector", sectors_df.sector)
    df.insert(1, "industry", industries_df.industry)

-------------------

**Classify each ratio**

In [115]:
#Ratios for which 'the higher the value of the ratio, the better'
high_ratios = ['_PAT_margin', '_EBITDA_margin', '_return_on_equity', '_return_on_assets', '_return_on_capital_employed',
              '_gross_margin', '_operating_margin', '_operating_cash_flow_ratio', '_return_on_invested_capital',
              '_return_on_sales', '_fixed_asset_turnover', '_working_capital_turnover', '_total_asset_turnover',
              '_inventory_turnover', '_accounts_receivables_turnover', '_payables_turnover', '_interest_coverage',
              '_current_ratio', '_quick_ratio', '_cash_ratio']

In [116]:
len(high_ratios)

20

In [117]:
#Ratios for which 'the lower the value of the ratio, the better'
low_ratios = ['_days_of_inventory_on_hand', '_debt_to_assets', '_debt_to_equity', '_equity_multiplier',
             '_total_debt_to_capital', '_net_debt_to_EBITDA', '_degree_financial_leverage',
             '_price_to_earnings', '_price_to_book_value', '_price_to_sales', '_price_to_free_cash_flow', 
             '_price_earnings_to_growth', '_enterprise_value_to_ebitda', '_enterprise_value_to_sales',
             '_days_of_sales_outstanding', '_days_of_sales_in_inventory', '_operating_cycle']

In [118]:
len(low_ratios)

17

-------------------

**Define ranking functions**

In [120]:
def compute_ranking_by_ratio_and_ticker(df, high_ratios, low_ratios):
    
    ranking_dict_by_ratio = dict()
    ranking_dict_by_ticker = {ticker:{} for ticker in df.index.to_list()}
    for ratio in df.columns.to_list():
        considered_ratio = False
        
        #rank function ranks from 1 to through n, so we can be sure that if any of the final ranking for any
        #company is equal to 0, it will be because they didn't have any ratio
        
        if ratio in high_ratios:
            ranking_dict_by_ratio[ratio]=dict(df[ratio].rank(ascending=False).sort_values())
            considered_ratio = True
        elif ratio in low_ratios:
            ranking_dict_by_ratio[ratio]=dict(df[ratio].rank(ascending=True).sort_values())
            considered_ratio = True
        if considered_ratio:
            for k in ranking_dict_by_ticker.keys():
                if not np.isnan(ranking_dict_by_ratio[ratio].get(k, 0)):
                    ranking_dict_by_ticker[k][ratio] = ranking_dict_by_ratio[ratio].get(k, 0)
                    
    return ranking_dict_by_ratio, ranking_dict_by_ticker

In [121]:
def compute_ranking(df, high_ratios, low_ratios, sort_by_ranking = True, all_dict=False):
    
    ranking_dict_by_ratio, ranking_dict_by_ticker = compute_ranking_by_ratio_and_ticker(df, high_ratios, low_ratios)
        
    ranking_dict = dict()
    for ticker in ranking_dict_by_ticker.keys():
        
        #For certain years of disp, some companies may not have any ratio. That is because by that time, these
        #companies were not trading in the S&P 500 yet
        if len(ranking_dict_by_ticker[ticker]) == 0:
            ranking_dict[ticker] = 0
        else:
            ranking_dict[ticker] = sum(ranking_dict_by_ticker[ticker].values())/len(ranking_dict_by_ticker[ticker])
        
    '''
    ranking = pd.DataFrame(ranking_dict.items(), columns=['ticker', 'ranking']).set_index('ticker')
    
    if sort_by_ranking:
        ranking = ranking.sort_values('ranking', ascending=True)
    
    if all_dict:
        return ranking_dict_by_ratio, ranking_dict_by_ticker, ranking
    '''
    
    return ranking_dict  #if you uncomment the lines above, you would return ranking dataframe

----------

**COMPUTE RANKINGS FOR EACH SECTOR**

In [123]:
sector_split_dict = {sector:df[df.sector==sector].drop(['sector', 'industry'], axis = 1) for sector in df.sector.unique()}

In [124]:
rankings_by_sector = dict()
for sector in sector_split_dict.keys():
    rankings_by_sector[sector] = compute_ranking(sector_split_dict[sector], high_ratios, low_ratios, True, False)

In [127]:
with open(f'models_outputs/ranking_output/X_year_disp/rankings_by_sector_Xyear.pickle', 'wb') as file:
    pickle.dump(rankings_by_sector, file)

**Retrieve the saved dictionary**

In [None]:
rankings_by_sector_dict = dict()
with open('models_outputs/ranking_output/X_year_disp/rankings_by_sector_Xyear.pickle','rb') as file:
    raw_data = file.read()
    rankings_by_sector_dict.update(pickle.loads(raw_data))

In [None]:
rankings_by_sector = dict()
for sector in rankings_by_sector_dict.keys():
    rankings_by_sector[sector] = pd.DataFrame.from_dict(rankings_by_sector_dict[sector], orient='index', columns=['ranking'])

In [None]:
rankings_by_sector['Industrials']