In [2]:
import numpy as np
import pandas as pd

In [3]:
DATA = ['Unnamed: 0', 'revenue','cost-goods-sold','gross-profit','research-development-expenses','selling-general-administrative-expenses','operating-expenses',
'operating-income','total-non-operating-income-expense','pre-tax-income','total-provision-income-taxes','income-after-taxes','income-from-continuous-operations',
'income-from-discontinued-operations','net-income','ebitda','ebit','basic-shares-outstanding','shares-outstanding','eps-basic-net-earnings-per-share',
'eps-earnings-per-share-diluted','cash-on-hand','receivables-total','inventory','other-current-assets','total-current-assets','net-property-plant-equipment',
'long-term-investments','goodwill-intangible-assets-total','other-long-term-assets','total-long-term-assets','total-assets','total-current-liabilities','long-term-debt',
'other-non-current-liabilities','total-long-term-liabilities','total-liabilities','common-stock-net','retained-earnings-accumulated-deficit','comprehensive-income',
'total-share-holder-equity','total-liabilities-share-holders-equity','net-income-loss','total-depreciation-amortization-cash-flow','other-non-cash-items','total-non-cash-items',
'change-in-accounts-receivable','change-in-inventories','change-in-accounts-payable','change-in-assets-liabilities','total-change-in-assets-liabilities',
'cash-flow-from-operating-activities','net-change-in-property-plant-equipment','net-change-in-intangible-assets','net-acquisitions-divestitures','investing-activities-other',
'cash-flow-from-investing-activities','net-long-term-debt','net-current-debt','debt-issuance-retirement-net-total','net-common-equity-issued-repurchased',
'net-total-equity-issued-repurchased','total-common-preferred-stock-dividends-paid','financial-activities-other','cash-flow-from-financial-activities',
'net-cash-flow','stock-based-compensation','common-stock-dividends-paid','current-ratio','long-term-debt-capital','debt-equity-ratio','gross-margin',
'operating-margin','ebit-margin','pre-tax-profit-margin','net-profit-margin','asset-turnover','inventory-turnover','receiveable-turnover','days-sales-in-receivables',
'roe','return-on-tangible-equity','roa','roi','book-value-per-share','operating-cash-flow-per-share','free-cash-flow-per-share','net-change-in-short-term-investments',
'net-change-in-long-term-investments','net-change-in-investments-total','other-operating-income-expenses','pre-paid-expenses','other-share-holders-equity','other-income',
'ebitda-margin']

REAL_RETURN_CLASS = "RealReturnClass"
REAL_RETURN = "RealReturn"
RISK_CLASS = 'RiskClass'
RISK = "Risk"

HIGH = 'high'
MEDIUM = 'medium'
LOW = 'low'

DATE = 'Unnamed: 0'

N_PERIODS = 2

FEATURE_SELECTION = "SPEARMAN"

DATASET_PATH = '../new_dataset/process_final_{}.csv'.format(N_PERIODS)

In [4]:
dataset = pd.read_csv(DATASET_PATH)
dataset = dataset.replace(to_replace=[np.NaN], value=0.0)

dataset = dataset.replace(to_replace=[HIGH], value=2.0)
dataset = dataset.replace(to_replace=[MEDIUM], value=1.0)
dataset = dataset.replace(to_replace=[LOW], value=0.0)

dataset_X = dataset.drop(columns=[REAL_RETURN_CLASS, REAL_RETURN, RISK_CLASS, RISK, DATE])
dataset_y = dataset.drop(columns=DATA)

In [5]:
def pearson(classType):
  ranking = dataset_X.corrwith(dataset_y[classType], method='spearman')
  ranking = ranking.abs()
  result = dict(zip(dataset_X.columns, ranking.values))
  
  return sorted(result.items(), key=lambda item: item[1], reverse=True)

In [6]:
pearson(REAL_RETURN_CLASS)

[('roi', 0.8597261749198114),
 ('roa', 0.7932860607232569),
 ('roe', 0.6536476450761809),
 ('asset-turnover', 0.5284280712391393),
 ('eps-basic-net-earnings-per-share', 0.420398105052518),
 ('eps-earnings-per-share-diluted', 0.4176253930289296),
 ('pre-tax-profit-margin', 0.37476551940366004),
 ('net-profit-margin', 0.3561311914213076),
 ('income-after-taxes', 0.3511411665664389),
 ('income-from-continuous-operations', 0.3494747788315793),
 ('net-income', 0.3472563892209999),
 ('pre-tax-income', 0.3463709116716013),
 ('net-income-loss', 0.3340901388115314),
 ('ebit', 0.3279711900722048),
 ('long-term-debt', 0.32110323791764567),
 ('operating-margin', 0.3118644186933811),
 ('ebit-margin', 0.3097832250359871),
 ('total-provision-income-taxes', 0.29565097936790374),
 ('book-value-per-share', 0.28905156558515266),
 ('total-long-term-assets', 0.28858781337218636),
 ('net-total-equity-issued-repurchased', 0.2850969585627324),
 ('net-common-equity-issued-repurchased', 0.28303559363255104),
 (

In [7]:
def writeFeature(classType, type):
  file = open(f'./files/{FEATURE_SELECTION}_{type}.txt', 'a')
  result = pearson(classType)

  for feature in result:
    file.write(str(feature))
    file.write("\n")
  
  file.close()

In [8]:
writeFeature(REAL_RETURN_CLASS, REAL_RETURN)
writeFeature(RISK_CLASS, RISK)