In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

from pandas.tseries.offsets import CustomBusinessDay

In [3]:
def org_columns(df):
    '''
    Organiza os dataframes, arruma nomes das colunas, remove linhas
    e colunas que possuam apenas NAs
    '''
    tickers = df.columns.str.extract('(.*)$')[0]
    df.columns = tickers
    df[df.index.str.contains("Jan|Feb|Apr|May|Jul|Aug|Oct|Nov")]=np.NaN
    df = df.fillna(method='ffill', limit=2)
    df.columns.name = "ticker"
    return df

In [42]:
# 1) Return on Asset (ROA%) for latest FY is > 0 = Score 1
# 1) Return on Assets - roa
roa_ati = pd.read_excel('../../data/fscore/roa_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
roa_can = pd.read_excel('../../data/fscore/roa_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
roa_can.drop(["Jan-86", "Feb-86"], inplace=True)
roa = pd.concat([roa_ati,roa_can],axis=1,sort=False)
roa = org_columns(roa)
roa = roa[roa.index.str.contains("Dec")]

In [44]:
# 2) Operating Cash Flow for latest FY is > Operating Cash Flow from 1 year ago ( FY-1) = Score 1
# 2) Operating Cash Flow - ocf
# Fica como lição que sempre que for fazer uma função para abrir um arquivo, dá uma olhada a olho nu no arquivo, nos
# limites, etc. As soluções serão ad hoc.
ocf_ati = pd.read_excel('../../data/fscore/cash_op_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ocf_can = pd.read_excel('../../data/fscore/cash_op_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ocf_ati.drop(["Jan-86", "Feb-86"], inplace=True)
ocf = pd.concat([ocf_ati,ocf_can],axis=1,sort=False)
ocf = org_columns(ocf)
ocf = ocf[ocf.index.str.contains("Dec")]

In [None]:
# 3) Return on Assets (ROA%) for latest FY is > FY prior = Score 1
# 3) Change in Return on Assets
# Basta usar dados anteriores

In [None]:
# 4) Operating Cash Flow for latest FY is > Net Income (before extraordinary items) for latest FY = Score 1
# 4) Accruals - accs
# Na verdade, Accruals pode ser estimado como
# Accruals = NI - Operating Cash Flows
# Um accrual negativo seria bom, devido à quality of earnings
# Na verdade não vou usar accruals, mas apenas testar se Operating Cash Flow > NI
# Operating Cash Flow já temos falta NI

In [46]:
# Net Income - ni
ni_ati = pd.read_excel('../../data/fscore/net_income_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ni_can = pd.read_excel('../../data/fscore/net_income_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ni_can.drop(["Jan-86", "Feb-86"], inplace=True)
ni = pd.concat([ni_ati,ni_can],axis=1,sort=False)
ni = org_columns(ni)
ni = ni[ni.index.str.contains("Dec")]

In [None]:
# 5) Long-term Debt to asset growth over 1 year is < 0 = Score 1
# Precisarei de Long-term Debt e de Total Assets

In [48]:
# Long-term Debt - ltd
ltd_ati = pd.read_excel('../../data/fscore/total_debt_LT_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ltd_can = pd.read_excel('../../data/fscore/total_debt_LT_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ltd_can.drop(["Jan-86", "Feb-86"], inplace=True)
ltd = pd.concat([ltd_ati,ltd_can],axis=1,sort=False)
ltd = org_columns(ltd)
ltd = ltd[ltd.index.str.contains("Dec")]

In [50]:
# Total Assets - ta
ta_ati = pd.read_excel('../../data/fscore/total_assets_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can = pd.read_excel('../../data/fscore/total_assets_mensal_cancelandas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can.drop(["Jan-86", "Feb-86"], inplace=True)
ta = pd.concat([ta_ati,ta_can],axis=1,sort=False)
ta = org_columns(ta)
ta = ta[ta.index.str.contains("Dec")]

In [51]:
# Long-term Debt to asset
ltdta = ltd.div(ta)

In [None]:
# 6) Current Ratio Growth
# 6) Current Ratio growth over 1 year is > 0 = Score 1

In [54]:
# Current Ratio - cr
cr_ati = pd.read_excel('../../data/fscore/current_ratio_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
cr_can = pd.read_excel('../../data/fscore/current_ratio_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
cr_can.drop(["Jan-86", "Feb-86"], inplace=True)
cr = pd.concat([cr_ati,cr_can],axis=1,sort=False)
cr = org_columns(cr)
cr = cr[cr.index.str.contains("Dec")]

In [56]:
# 7) Number of Shares - ns
# 7) No New Shares Issues in the preceding FY = Score 1
ns_ati = pd.read_excel('../../data/fscore/number_shares_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ns_can = pd.read_excel('../../data/fscore/number_shares_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ns_can.drop(["Jan-86", "Feb-86"], inplace=True)
ns = pd.concat([ns_ati,ns_can],axis=1,sort=False)
ns.columns = ns.columns.str.extract('(.*)$')[0]
ns.columns.name = "ticker"
ns.drop(["Oct-19"], inplace=True)
ns = ns[ns.index.str.contains("Dec")]

In [58]:
# 8) Gross Margin - gm
# 8) Gross Margin growth over 1 year is > 0 = Score 1
gm_ati = pd.read_excel('../../data/fscore/gross_margin_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
gm_can = pd.read_excel('../../data/fscore/gross_margin_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
gm_can.drop(["Jan-86", "Feb-86"], inplace=True)
gm = pd.concat([gm_ati,gm_can],axis=1,sort=False)
gm = org_columns(gm)
gm = gm[gm.index.str.contains("Dec")]

In [61]:
# 9) Asset Turnover - at
# 9) Asset Turnover growth over 1 year is > 0 = Score 1
at_ati = pd.read_excel('../../data/fscore/asset_turnover_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
at_can = pd.read_excel('../../data/fscore/asset_turnover_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
at_can.drop(["Jan-86", "Feb-86"], inplace=True)
at = pd.concat([at_ati,at_can],axis=1,sort=False)
at = org_columns(at)
at = at[at.index.str.contains("Dec")]

In [62]:
at

ticker,QVQP3B,ABCB4,EALT3,EALT4,ADHM3,TIET3,TIET4,TIET11,AFLT3,ALEF3B,...,WHMT3,WISA3,WISA4,WWOW3,ILMD3,ILMD4,ESTC4,ESTC11,OPZI3B,ZIVI4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Dec-86,,,,,,,,,,,...,1.026447,,,,,,,,,0.881766
Dec-87,,,,,,,,,,,...,0.767047,,,,,,,,,0.703673
Dec-88,,,,,,,,,,,...,1.576458,,,,,,,,,1.2374
Dec-89,,,,,,,,,,,...,1.419465,,,,,,,,,1.231946
Dec-90,,,,,,,,,,,...,1.493137,,,,,,,,,1.244542
Dec-91,,,,,,,,,,,...,0.775587,,,,,,,,,0.737726
Dec-92,,,,,,,,,,,...,0.976103,,,,,,,,,0.761604
Dec-93,,,,,,,,,,,...,0.926292,,,,,,,,,1.051878
Dec-94,,,,,,,,,,,...,1.092053,1.18726,1.18726,,,,,,,1.051887
Dec-95,,,,,,,,,,,...,0.849781,,,,,,,,,1.218568


In [25]:
print("total: ", ltd.size)
print("ltd:",ltd.isnull().sum().sum())
print("ta:",ta.isnull().sum().sum())
print("ltdta:",ltdta.isnull().sum().sum())

total:  521482
ltd: 340794
ta: 325415
ltdta: 340914


In [None]:
# Uma pergunta que devemos nos fazer é se a carteira vai ser renovada de trimestre a trimestre ou mês a mês
# Vou fazer rodando só no final de ano

In [41]:
# df[df.index.str.contains("Jan|Feb|Apr|May|Jul|Aug|Oct|Nov")]=np.NaN
ni[ni.index.str.contains("Dec")]

ticker,QVQP3B,ABCB4,EALT3,EALT4,ADHM3,TIET3,TIET4,TIET11,AFLT3,ALEF3B,...,WHMT3,WISA3,WISA4,WWOW3,ILMD3,ILMD4,ESTC4,ESTC11,OPZI3B,ZIVI4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Dec-86,,,,,,,,,,,...,0.000257,,,,,,,,,9e-06
Dec-87,,,,,,,,,,,...,0.001104,,,,,,,,,2.1e-05
Dec-88,,,,,,,,,,,...,0.010167,,,,,,,,,0.000261
Dec-89,,,,,,,,,,,...,0.226478,,,,,,,,,0.01896
Dec-90,,,,,,,,,,,...,2.971217,,,,,,,,,-0.3776
Dec-91,,,,,,,,,,,...,7.593085,,,,,,,,,-0.029895
Dec-92,,,,,,,,,,,...,247.010545,,,,,,,,,-47.258039
Dec-93,,,,,,,,,,,...,7761.942545,,,,,,,,,-2703.059273
Dec-94,,,,,,,,,,,...,78311.0,4147.804,4147.804,,,,,,,-10348.0
Dec-95,,,,,,,,,,,...,57427.0,,,,,,,,,-23254.0
