In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

from pandas.tseries.offsets import CustomBusinessDay

In [3]:
def org_columns(df):
    '''
    Organiza os dataframes, arruma nomes das colunas, remove linhas
    e colunas que possuam apenas NAs
    '''
    tickers = df.columns.str.extract('(.*)$')[0]
    df.columns = tickers
    df[df.index.str.contains("Jan|Feb|Apr|May|Jul|Aug|Oct|Nov")]=np.NaN
    df = df.fillna(method='ffill', limit=2)
    df.columns.name = "ticker"
    return df

In [42]:
# 1) Return on Asset (ROA%) for latest FY is > 0 = Score 1
# 1) Return on Assets - roa
roa_ati = pd.read_excel('../../data/fscore/roa_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
roa_can = pd.read_excel('../../data/fscore/roa_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
roa_can.drop(["Jan-86", "Feb-86"], inplace=True)
roa = pd.concat([roa_ati,roa_can],axis=1,sort=False)
roa = org_columns(roa)

In [44]:
# 2) Operating Cash Flow for latest FY is > Operating Cash Flow from 1 year ago ( FY-1) = Score 1
# 2) Operating Cash Flow - ocf
# Fica como lição que sempre que for fazer uma função para abrir um arquivo, dá uma olhada a olho nu no arquivo, nos
# limites, etc. As soluções serão ad hoc.
ocf_ati = pd.read_excel('../../data/fscore/cash_op_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ocf_can = pd.read_excel('../../data/fscore/cash_op_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ocf_ati.drop(["Jan-86", "Feb-86"], inplace=True)
ocf = pd.concat([ocf_ati,ocf_can],axis=1,sort=False)
ocf = org_columns(ocf)

In [None]:
# 3) Return on Assets (ROA%) for latest FY is > FY prior = Score 1
# 3) Change in Return on Assets
# Basta usar dados anteriores

In [None]:
# 4) Operating Cash Flow for latest FY is > Net Income (before extraordinary items) for latest FY = Score 1
# 4) Accruals - accs
# Na verdade, Accruals pode ser estimado como
# Accruals = NI - Operating Cash Flows
# Um accrual negativo seria bom, devido à quality of earnings
# Na verdade não vou usar accruals, mas apenas testar se Operating Cash Flow > NI
# Operating Cash Flow já temos falta NI

In [46]:
# Net Income - ni
ni_ati = pd.read_excel('../../data/fscore/net_income_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ni_can = pd.read_excel('../../data/fscore/net_income_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ni_can.drop(["Jan-86", "Feb-86"], inplace=True)
ni = pd.concat([ni_ati,ni_can],axis=1,sort=False)
ni = org_columns(ni)

In [None]:
# 5) Long-term Debt to asset growth over 1 year is < 0 = Score 1
# Precisarei de Long-term Debt e de Total Assets

In [48]:
# Long-term Debt - ltd
ltd_ati = pd.read_excel('../../data/fscore/total_debt_LT_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ltd_can = pd.read_excel('../../data/fscore/total_debt_LT_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ltd_can.drop(["Jan-86", "Feb-86"], inplace=True)
ltd = pd.concat([ltd_ati,ltd_can],axis=1,sort=False)
ltd = org_columns(ltd)

In [50]:
# Total Assets - ta
ta_ati = pd.read_excel('../../data/fscore/total_assets_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can = pd.read_excel('../../data/fscore/total_assets_mensal_cancelandas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can.drop(["Jan-86", "Feb-86"], inplace=True)
ta = pd.concat([ta_ati,ta_can],axis=1,sort=False)
ta = org_columns(ta)

In [51]:
# Long-term Debt to asset
ltdta = ltd.div(ta)

In [None]:
# 6) Current Ratio Growth
# 6) Current Ratio growth over 1 year is > 0 = Score 1

In [54]:
# Current Ratio - cr
cr_ati = pd.read_excel('../../data/fscore/current_ratio_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
cr_can = pd.read_excel('../../data/fscore/current_ratio_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
cr_can.drop(["Jan-86", "Feb-86"], inplace=True)
cr = pd.concat([cr_ati,cr_can],axis=1,sort=False)
cr = org_columns(cr)

In [56]:
# 7) Number of Shares - ns
# 7) No New Shares Issues in the preceding FY = Score 1
ns_ati = pd.read_excel('../../data/fscore/number_shares_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ns_can = pd.read_excel('../../data/fscore/number_shares_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ns_can.drop(["Jan-86", "Feb-86"], inplace=True)
ns = pd.concat([ns_ati,ns_can],axis=1,sort=False)
ns.columns = ns.columns.str.extract('(.*)$')[0]
ns.columns.name = "ticker"
ns.drop(["Oct-19"], inplace=True)

In [58]:
# 8) Gross Margin - gm
# 8) Gross Margin growth over 1 year is > 0 = Score 1
gm_ati = pd.read_excel('../../data/fscore/gross_margin_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
gm_can = pd.read_excel('../../data/fscore/gross_margin_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
gm_can.drop(["Jan-86", "Feb-86"], inplace=True)
gm = pd.concat([gm_ati,gm_can],axis=1,sort=False)
gm = org_columns(gm)

In [61]:
# 9) Asset Turnover - at
# 9) Asset Turnover growth over 1 year is > 0 = Score 1
at_ati = pd.read_excel('../../data/fscore/asset_turnover_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
at_can = pd.read_excel('../../data/fscore/asset_turnover_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
at_can.drop(["Jan-86", "Feb-86"], inplace=True)
at = pd.concat([at_ati,at_can],axis=1,sort=False)
at = org_columns(at)

In [25]:
print("total: ", ltd.size)
print("ltd:",ltd.isnull().sum().sum())
print("ta:",ta.isnull().sum().sum())
print("ltdta:",ltdta.isnull().sum().sum())

total:  521482
ltd: 340794
ta: 325415
ltdta: 340914


In [None]:
# Uma pergunta que devemos nos fazer é se a carteira vai ser renovada de trimestre a trimestre ou mês a mês
# Vou fazer rodando só no final de ano