In [None]:
# O objetivo desse estudo é aprender manipulação de datas. Eu já tinha feito alguma coisa com dias úteis na parte de testes
# do hedgepanel. Dá uma olhada. Esses estudos se complementam

In [12]:
import pandas as pd
import numpy as np
import datetime as dt

In [None]:
# Estarei baseando meus estudos em
# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html para pandas e
# https://docs.scipy.org/doc/numpy/reference/arrays.datetime.html para numpy
# A questão de datas é misturado pandas com numpy. Pandas usa os tipos básicos do numpy datetime64 and timedelta64 dtypes
# e expande suas funcionalidades
# lembrar que o 64 é porque temos pd.Timedelta e pd.datetime (assim respeitando as maiúsculas e minúsculas)

In [14]:
# notar como date_range cria um array de datas, embora o formato seja pd.DatetimeIndex, ele usa o datatype
# np.datetime64
# em numpy você cria um array de dates com np.arange('2005-02', '2005-03', dtype='datetime64[D]'), mas em pandas
# acredito que tenha que criar um DatetimeIndex
idx = pd.date_range('2018-01-01', periods=5, freq='H')
idx
# acredito que cada elemento deste array poderia ser criado separadamente com pd.Timestamp

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')

In [22]:
# em numpy você cria um array de datas assim. Não dá pra fazer grandes coisas com o array
# notar que aqui são só dates (datas) e não timestamps (date+time)
x = np.arange('2005-02', '2005-03', dtype='datetime64[D]')
x

array(['2005-02-01', '2005-02-02', '2005-02-03', '2005-02-04',
       '2005-02-05', '2005-02-06', '2005-02-07', '2005-02-08',
       '2005-02-09', '2005-02-10', '2005-02-11', '2005-02-12',
       '2005-02-13', '2005-02-14', '2005-02-15', '2005-02-16',
       '2005-02-17', '2005-02-18', '2005-02-19', '2005-02-20',
       '2005-02-21', '2005-02-22', '2005-02-23', '2005-02-24',
       '2005-02-25', '2005-02-26', '2005-02-27', '2005-02-28'],
      dtype='datetime64[D]')

In [24]:
print(len(idx))
range(len(idx))
# Qual a diferença entre range() e np.arange? Primeiramente arange é de numpy, enquanto range é da base do python
# arange probably resorts to a native implementation and might be faster therefore. On the other hand, arange returns 
# a full array, which occupies memory, so there might be an overhead. Python 3's range expression is a generator, which 
# does not hold all the values in memory.

5


range(0, 5)

In [38]:
# veja como é fácil fazer resampling
# lembrar que o pd.date_range geralmente é utilizado no index de uma pd.Series ou de um pd.Dataframe, mas ele também pode
# ser usado como o dado as itself.
# Lembrar que se timestamp for usado como index, é corced a Datetimeindex. Algo parecido ocorre com Period
ts = pd.Series(range(len(idx)), index=idx)
print(ts)
# se deixamos ts.resample sem nada, ele retorna um Resampler
x = ts.resample('2H')
# teria que escolher o método de preenchimento. Seja média, ffil, soma dos elementos agrupados, var dos elementos agrupados, etc
x.mean()

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64


2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [None]:
# Pandas geralmente usa 4 conceitos de tempo:
# 1) Date times (pd.Timestamp)
# 2) Time deltas (pd.Timedelta)
# 3) Time spans (pd.Period)
# 4) Date offsets (pd.DateOffset)
# Notar como o nome de Time spans é Period

# 1 2 3 4 5 6 7 8 9
#       x            timestamp 4
#   x - - - - x      Period de 2 a 7

# https://www.geeksforgeeks.org/using-timedelta-and-period-to-create-datetime-based-indexes-in-pandas/
# um time stamp se consiste de vários time periods
# um time delta é bem exato até nos segundos. Um date offset varia de acordo com horário de verão, dias úteis, etc..

In [45]:
friday = pd.Timestamp('2018-01-05')
friday.day_name() # retorna 'Friday'
saturday = friday + pd.Timedelta('1 day') # retorna Timestamp('2018-01-06 00:00:00')
monday = friday + pd.offsets.BDay() # adiciona 1 business day e retorna Timestamp('2018-01-08 00:00:00')
monday

Timestamp('2018-01-08 00:00:00')

In [None]:
# Para converter strings e afins para timestamp, usar pd.to_datetime. Dá também para usar contrutor pd.DatetimeIndex. Esses
# métodos suportam format='%Y/%m/%d'. Suportam também o formato de tempo UNIX conhecido como integer or float epoch times

In [None]:
# Pandas permite criar um range de timestamps
index = pd.date_range(start, end) # usa calendar days, com frequencia diario D
index = pd.bdate_range(start, end) # usa business days, frequencia business day B
# em date_range, dá pra usar o parametro freq= e fazer miséria! Talvez resolva o problema de business end of month day
# freq='BM'
# dá pra usar o parâmetro periods=20 para gerar 20 periodos entre o start e o end

In [None]:
# Em pandas também temos a opção de gerar ranges usando o parâmetro custom, que permite usar weekmask e holidays. A frequência
# fica como freq="C"

Date Offset	Frequency String	Description
DateOffset	None	Generic offset class, defaults to 1 calendar day
BDay or BusinessDay	'B'	business day (weekday)
CDay or CustomBusinessDay	'C'	custom business day
Week	'W'	one week, optionally anchored on a day of the week
WeekOfMonth	'WOM'	the x-th day of the y-th week of each month
LastWeekOfMonth	'LWOM'	the x-th day of the last week of each month
MonthEnd	'M'	calendar month end
MonthBegin	'MS'	calendar month begin
BMonthEnd or BusinessMonthEnd	'BM'	business month end
BMonthBegin or BusinessMonthBegin	'BMS'	business month begin
CBMonthEnd or CustomBusinessMonthEnd	'CBM'	custom business month end
CBMonthBegin or CustomBusinessMonthBegin	'CBMS'	custom business month begin
SemiMonthEnd	'SM'	15th (or other day_of_month) and calendar month end
SemiMonthBegin	'SMS'	15th (or other day_of_month) and calendar month begin
QuarterEnd	'Q'	calendar quarter end
QuarterBegin	'QS'	calendar quarter begin
BQuarterEnd	'BQ	business quarter end
BQuarterBegin	'BQS'	business quarter begin
FY5253Quarter	'REQ'	retail (aka 52-53 week) quarter
YearEnd	'A'	calendar year end
YearBegin	'AS' or 'BYS'	calendar year begin
BYearEnd	'BA'	business year end
BYearBegin	'BAS'	business year begin
FY5253	'RE'	retail (aka 52-53 week) year
Easter	None	Easter holiday
BusinessHour	'BH'	business hour
CustomBusinessHour	'CBH'	custom business hour
Day	'D'	one absolute day
Hour	'H'	one hour
Minute	'T' or 'min'	one minute
Second	'S'	one second
Milli	'L' or 'ms'	one millisecond
Micro	'U' or 'us'	one microsecond
Nano	'N'	one nanosecond

In [None]:
# DatetimeIndex can be used like a regular index and offers all of its intelligent functionality like selection, slicing, etc.
# Tem maneiras muito inteligentes de parsear Datetimeindex tipo ts['10/31/2011':'12/31/2011']
# ts['2011'] seleciona tudo de 2011, ts['2011-6']

# .dt accessor - Series has an accessor to succinctly return datetime like properties for the values of the Series, if it 
# is a datetime/period like Series. This will return a Series, indexed like the existing Series.
# This enables nice expressions like this: s[s.dt.day == 2]



In [None]:
# TIMEDELTA vs DATEOFFSET
# Timedelta é em valores absolutos e DateOffset é em valores relativos
# Por isso que para calcular businessdays é utilizado DateOffset

# a Timedelta day will always increment datetimes by 24 hours, while a DateOffset day will increment datetimes to the same 
# time the next day whether a day represents 23, 24 or 25 hours due to daylight savings time

# This particular day contains a day light savings time transition
ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')

# Respects absolute time
ts + pd.Timedelta(days=1)
# Out[142]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')

# Respects calendar time
ts + pd.DateOffset(days=1)
# Out[143]: Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')

friday = pd.Timestamp('2018-01-05')

friday.day_name()
# Out[145]: 'Friday'

# Add 2 business days (Friday --> Tuesday)
two_business_days = 2 * pd.offsets.BDay()

two_business_days.apply(friday)
# Out[147]: Timestamp('2018-01-09 00:00:00')

friday + two_business_days
# Out[148]: Timestamp('2018-01-09 00:00:00')

(friday + two_business_days).day_name()
# Out[149]: 'Tuesday'

Most DateOffsets have associated frequencies strings, or offset aliases, that can be passed into freq keyword arguments. The available date offsets and associated frequency strings can be found below:

Date Offset	Frequency String	Description
DateOffset	None	Generic offset class, defaults to 1 calendar day
BDay or BusinessDay	'B'	business day (weekday)
CDay or CustomBusinessDay	'C'	custom business day
Week	'W'	one week, optionally anchored on a day of the week
WeekOfMonth	'WOM'	the x-th day of the y-th week of each month
LastWeekOfMonth	'LWOM'	the x-th day of the last week of each month
MonthEnd	'M'	calendar month end
MonthBegin	'MS'	calendar month begin
BMonthEnd or BusinessMonthEnd	'BM'	business month end
BMonthBegin or BusinessMonthBegin	'BMS'	business month begin
CBMonthEnd or CustomBusinessMonthEnd	'CBM'	custom business month end
CBMonthBegin or CustomBusinessMonthBegin	'CBMS'	custom business month begin
SemiMonthEnd	'SM'	15th (or other day_of_month) and calendar month end
SemiMonthBegin	'SMS'	15th (or other day_of_month) and calendar month begin
QuarterEnd	'Q'	calendar quarter end
QuarterBegin	'QS'	calendar quarter begin
BQuarterEnd	'BQ	business quarter end
BQuarterBegin	'BQS'	business quarter begin
FY5253Quarter	'REQ'	retail (aka 52-53 week) quarter
YearEnd	'A'	calendar year end
YearBegin	'AS' or 'BYS'	calendar year begin
BYearEnd	'BA'	business year end
BYearBegin	'BAS'	business year begin
FY5253	'RE'	retail (aka 52-53 week) year
Easter	None	Easter holiday
BusinessHour	'BH'	business hour
CustomBusinessHour	'CBH'	custom business hour
Day	'D'	one absolute day
Hour	'H'	one hour
Minute	'T' or 'min'	one minute
Second	'S'	one second
Milli	'L' or 'ms'	one millisecond
Micro	'U' or 'us'	one microsecond
Nano	'N'	one nanosecond

DateOffsets additionally have rollforward() and rollback() methods. É legal pois além de pular dias não úteis ele também faz offset de horas não úteis

In [49]:
# Uso de Custom Business Days
weekmask_egypt = 'Sun Mon Tue Wed Thu'

# They also observe International Workers' Day so let's
# add that for a couple of years
holidays = ['2012-05-01',
             dt.datetime(2013, 5, 1),
             np.datetime64('2014-05-01')]

# depois de criado o Custom Business Days tem duas maneiras de usar
bday_egypt = pd.offsets.CustomBusinessDay(holidays=holidays,
                                          weekmask=weekmask_egypt)

dt = dt.datetime(2013, 4, 30)

dt + 2 * bday_egypt # 1 como offset
#Out[189]: Timestamp('2013-05-05 00:00:00')

dts = pd.date_range(dt, periods=5, freq=bday_egypt) # 2 como range. Resulta em um freq='C'

In [51]:
# dá pra programar holidays e o que acontece se eles caem no sábado e tem que puxar pra sexta (comum nos US?)
# dá pra botar frequência como de 3 em 3 dias úteis
# dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay())
# com asfreq() dá pra reescalonar e aumentar ou diminuir dias da frequência
# ts.asfreq(pd.offsets.BDay()) ou ts.asfreq(pd.offsets.BDay(), method='pad')

# transforma de datetimeindex pra array de datetime com to_pydatetime(), mas não sei qual a vantagem ou diferenças
# resampling permite mudar por exemplo de 10 em 10 segundos para 5 mins
# and efficient functionality for performing resampling operations during frequency conversion (e.g., converting secondly
# data into 5-minutely data)
# ts.resample('5Min').sum()
# tem até funções específicas de finance
# ts.resample('5Min').ohlc()
# com resample tem a questão de qual label usar. label specifies whether the result is labeled with the beginning or the
# end of the interval. loffset performs a time adjustment on the output labels. Usar label (index) mais da esquerda ou direita?

# Faltou se aprofundar um pouco em resampling
# Faltou se aprofundar em Periods
# Faltou se aprofundar em Time zone handling

datetime.datetime(2013, 4, 30, 0, 0)

In [3]:
def org_columns(df):
    '''
    Organiza os dataframes com fundamentos:
        Renomeia columns
        Seleciona apenas tickers de interesse
    '''
    tickers = df.columns.str.extract('(.*)$')[0]
    df.columns = tickers
    on = tickers[tickers.str.contains('.\D3$')]
    pn = tickers[tickers.str.contains('.\D4$')]
    sto_tick = pd.concat([on, pn], ignore_index=True)
    df = df[sto_tick]
    df = df[df.index.str.contains("Mar|Jun|Sep|Dec")]
    df.columns.name = "ticker"
    return df

In [4]:
# Eliminar ações com volume abaixo de 3 milhões no mês, para isso precisamos do volume mensal. Poderíamos usar volume médio
# Volume Mensal (in thousands)
vm_ati = pd.read_excel('../../data/economatica_1/volume_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
vm_can = pd.read_excel('../../data/economatica_1/volume_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
vm_can.drop(["Jan-86", "Feb-86"], inplace=True)

vm = pd.concat([vm_ati,vm_can],axis=1,sort=False)
vm = org_columns(vm)

In [8]:
# closing price monthly
cp_mon = pd.read_excel('../../data/economatica_0/close_monthly.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
cp_mon.drop(["Jan-86", "Feb-86"], inplace=True)
cp_mon = org_columns(cp_mon)

In [11]:
vm

ticker,EALT3,ADHM3,TIET3,AFLT3,BRGE3,CRIV3,RPAD3,BRIV3,ALSO3,APTI3,...,VIVO4,VTEC4,VULC4,WEGE4,WMBY4,WET4,WISA4,ILMD4,ESTC4,ZIVI4
Mar-86,,,,,,,,2.021252e-07,,,...,,,2.174527e-06,,1.168935e-05,,,7.927801e-07,,
Jun-86,,,,,,,,1.298641e-07,,,...,,,5.368808e-07,0.000021,1.091755e-05,,,1.186296e-05,,
Sep-86,,,,,,,,7.900364e-08,,,...,,,3.141345e-07,0.000006,4.285257e-06,,,3.327951e-06,,
Dec-86,,,,,,,,1.099007e-06,,,...,,,2.069063e-08,0.000001,2.374025e-06,,,2.484352e-06,,
Mar-87,,,,,,,,7.365054e-08,,,...,,,4.002121e-08,0.000006,7.027429e-07,,,1.758215e-06,,
Jun-87,,,,,,,1.566810e-07,1.835705e-07,,,...,,,4.980876e-06,0.000022,9.286172e-06,,,4.225760e-06,,2.011705e-06
Sep-87,,,,,,,1.926200e-07,8.342663e-07,,,...,,,3.730049e-06,0.000017,4.081276e-06,,,1.074898e-05,,6.912728e-07
Dec-87,,,,,,,1.176149e-07,2.263374e-07,,,...,,,1.704742e-06,0.000007,2.625647e-06,,,2.576203e-06,,6.542720e-07
Mar-88,,,,,,,3.762676e-07,1.906176e-06,,,...,,,9.542665e-07,0.000115,1.795613e-05,,,7.956805e-06,,7.349243e-06
Jun-88,,,,,,,1.165342e-06,9.034684e-07,,,...,,9.765229e-07,1.186199e-05,0.000155,1.297281e-05,,,3.421331e-05,,2.632411e-05


In [9]:
cp_mon

ticker,APPA3,ABYA3,AVIL3,ADHM3,AELP3,AESL3,GETI3,AETA3,AFLT3,BSGR3,...,VPSC4,VPTA4,VGOR4,VTEC4,WET4,MWET4,WHRL4,WLMM4,ILMD4,ZIVI4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mar-86,,,,,,,,,,,...,,,,,,,,4.056327e-09,8.749650e-08,
Jun-86,,,,,,,,,,,...,,,,,,,,2.761990e-09,1.698899e-07,
Sep-86,,,,,,,,,,,...,,,,,,,3.173499e-10,1.216898e-09,9.213262e-08,
Dec-86,,,,,,,,,,,...,,,,,,,2.167268e-10,8.112654e-10,7.841075e-08,
Mar-87,,,,,,,,,,,...,,,,,,,1.238439e-10,5.936988e-10,7.187651e-08,
Jun-87,,,,,,,,,,,...,,,1.945431e-10,,,3.216852e-07,3.096097e-10,1.438152e-09,1.128640e-07,6.165559e-09
Sep-87,,,,,,,,,,,...,,,2.804453e-10,,,3.666265e-07,5.275530e-10,1.253774e-09,1.551879e-07,9.248338e-09
Dec-87,,,,,,,,,,,...,,,2.273880e-10,,,1.892266e-07,6.234718e-10,1.307841e-09,9.522896e-08,5.240725e-09
Mar-88,,,,,,,,,,,...,,,1.288532e-09,,,1.123533e-06,1.480849e-09,3.966713e-09,1.798769e-07,2.157946e-08
Jun-88,,,3.971664e-08,,,,,,,,...,,,1.354662e-09,3.761756e-12,,2.351345e-06,1.742175e-09,9.291357e-09,5.264177e-07,4.501236e-08


In [5]:
# EBIT
ebit_ati = pd.read_excel('../../data/economatica_1/ebit_mensal__ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ebit_can = pd.read_excel('../../data/economatica_1/ebit_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ebit_can.drop(["Jan-86", "Feb-86"], inplace=True)

ebit = pd.concat([ebit_ati,ebit_can],axis=1,sort=False)
ebit = org_columns(ebit)

In [161]:
# Net Working Capital
nwc_ati = pd.read_excel('../../data/economatica_1/working_capital_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
nwc_can = pd.read_excel('../../data/economatica_1/working_capital_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
nwc_can.drop(["Jan-86", "Feb-86"], inplace=True)

nwc = pd.concat([ebit_ati,ebit_can],axis=1,sort=False)
nwc = org_columns(nwc)

In [162]:
# Total Assets
ta_ati = pd.read_excel('../../data/economatica_1/total_assets_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can = pd.read_excel('../../data/economatica_1/total_assets_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ta_can.drop(["Jan-86", "Feb-86"], inplace=True)

ta = pd.concat([ta_ati,ta_can],axis=1,sort=False)
ta = org_columns(ta)

In [163]:
# Ativo Circulante
ac_ati = pd.read_excel('../../data/economatica_1/ativo_circulante_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ac_can = pd.read_excel('../../data/economatica_1/ativo_circulante_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
ac_can.drop(["Jan-86", "Feb-86"], inplace=True)

ac = pd.concat([ac_ati,ac_can],axis=1,sort=False)
ac = org_columns(ac)

In [164]:
# Passivo Circulante
pc_ati = pd.read_excel('../../data/economatica_1/passivo_circulante_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
pc_can = pd.read_excel('../../data/economatica_1/passivo_circulante_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
pc_can.drop(["Jan-86", "Feb-86"], inplace=True)

pc = pd.concat([pc_ati,pc_can],axis=1,sort=False)
pc = org_columns(pc)

In [165]:
# Total Current Assets
# tca não perdeu nada nessa operação quanto ao não uso de fill_value em relação a ac-pc
# lembrar que nessas operações o número de colunas vai ser o maior possível, tipo com concatenar
# tca = ac - pc
tca = ac.sub(pc,fill_value=0)

In [166]:
# Goodwill
gw_ati = pd.read_excel('../../data/economatica_1/goodwill_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
gw_can = pd.read_excel('../../data/economatica_1/goodwill_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
# pc_can.drop(["Jan-86", "Feb-86"], inplace=True)

gw = pd.concat([gw_ati,gw_can],axis=1,sort=False)
gw = org_columns(gw)

In [167]:
# Net Fixed Assets
# nfa = ta - tca - gw
# lembrar que a ordem de precedência é a sub tca e só depois a sub gw
# nfa2 = ta.sub(tca, fill_value=0).sub(gw, fill_value=0)
# nfa.equals(nfa2) # vai dar False pois devem ser diferentes

nfa = ta.sub(tca, fill_value=0).sub(gw, fill_value=0)

In [168]:
# Market Value of Equity
mve_ati = pd.read_excel('../../data/economatica_1/valor_mercado_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
mve_can = pd.read_excel('../../data/economatica_1/valor_mercado_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
mve_can.drop(["Jan-86", "Feb-86"], inplace=True)

mve = pd.concat([mve_ati,mve_can],axis=1,sort=False)
mve = org_columns(mve)

In [169]:
# Net Interest-Bearing Debt
nibd_ati = pd.read_excel('../../data/economatica_1/net_debt_mensal_ativas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
nibd_can = pd.read_excel('../../data/economatica_1/net_debt_mensal_canceladas.xlsx',
                            skiprows = [1,2],
                            header=1,
                            index_col = 0,
                            na_values='-')
nibd_can.drop(["Jan-86", "Feb-86"], inplace=True)

nibd = pd.concat([nibd_ati,nibd_can],axis=1,sort=False)
nibd = org_columns(nibd)

In [170]:
# Enterprise Level
# ev1 dá bem diferente de ev2
# ev1 = mve + nibd
# ev2 = mve.add(nibd, fill_value=0)
ev = mve.add(nibd, fill_value=0)

In [171]:
# Return on Capital (ROC)
# roc = ebit / (nwc + nfa)
roc = ebit.div(nwc.add(nfa,fill_value=0),fill_value=0)

In [172]:
# Earnings Yield
ey = ebit.div(ev, fill_value=0)

In [173]:
ey

ticker,AALR3,ABCB3,ABCB4,ABEV3,ABYA3,ACES3,ACES4,ACGU3,ADHM3,AEDU3,...,WISA3,WISA4,WIZS3,WLMM3,WLMM4,WMBY3,WMBY4,WWOW3,YDUQ3,ZIVI4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mar-86,,,,,,,,,,,...,,,,,,,,,,
Jun-86,,,,,,,,,,,...,,,,,,,,,,
Sep-86,,,,,,,,,,,...,,,,,,,,,,
Dec-86,,,,0.346678,,,,,,,...,,,,0.253990,0.253990,0.000000e+00,0.000000e+00,,,
Mar-87,,,,0.000000,,,,,,,...,,,,0.000000,0.000000,0.000000e+00,0.000000e+00,,,
Jun-87,,,,0.000000,,,,,,,...,,,,0.000000,0.000000,0.000000e+00,0.000000e+00,,,0.000000
Sep-87,,,,0.000000,,,,,,,...,,,,0.000000,0.000000,0.000000e+00,0.000000e+00,,,0.000000
Dec-87,,,,-0.224333,,0.000000,0.000000,,,,...,,,,1.279382,1.279382,-3.377197e-03,-3.377197e-03,,,0.000000
Mar-88,,,,0.000000,,0.000000,0.000000,,,,...,,,,0.000000,0.000000,0.000000e+00,0.000000e+00,,,0.000000
Jun-88,,,,0.000000,,0.000000,0.000000,,,,...,,,,0.000000,0.000000,0.000000e+00,0.000000e+00,,,0.000000


In [174]:
# ey.loc["Mar-18"]
ey_ = ey.loc["Jun-19"]
roc_ = roc.loc["Mar-19"]

In [175]:
ey_.sort_values(ascending=False)

ticker
FIGE3    179.666667
FIGE4    179.666667
CALI3     54.563107
CALI4     54.563107
CMSA4      1.098619
CMSA3      1.098619
MNPR3      0.624504
STTR3      0.584374
CEGR3      0.462138
CORR3      0.453794
CORR4      0.453794
CEAB3      0.435736
STKF3      0.311146
VIVA3      0.291689
RSUL4      0.265535
RSUL3      0.265535
GPAR3      0.238085
INNT3      0.228256
ODER3      0.194433
ODER4      0.194433
MNPR4      0.184226
PTNT3      0.172129
PTNT4      0.172129
AGRO3      0.171524
CNSY3      0.165935
CEBR3      0.156307
QUSW3      0.145402
SMFT3      0.144353
NUTR3      0.144056
CASN4      0.140790
            ...    
UOLL3           NaN
UOLL4           NaN
VAGV3           NaN
VAGV4           NaN
VASP3           NaN
VCPA4           NaN
VEMG4           NaN
VGOR3           NaN
VGOR4           NaN
VIGR3           NaN
VINE3           NaN
VIVO3           NaN
VIVO4           NaN
VPSC3           NaN
VPSC4           NaN
VPTA3           NaN
VPTA4           NaN
VRLM4           NaN
VSMA3        

In [47]:
# criar df com dictionary e um index e usar expressões lógicas
df1 = pd.DataFrame({'A': [12, 44, 17, 1],
                    'B': [4, 8, 100, -3],
                    'C': [47, 68, 52, 1],
                    'D': [0, 0, 0, 0]},
                    index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': [1, 2, 3, 4],
                    'B': [-10, 4, 5, 13],
                    'C': [12, 23, 10, 100],
                    'D': [1, 1, 1, 1]},
                    index=[0, 1, 2, 3])
df3 = pd.DataFrame({'A': [1, 2, 3, 4],
                    'B': [-10, np.nan, 5, 13],
                    'C': [np.nan, 23, 10, 100],
                    'D': [1, 1, 1, 1]},
                    index=[0, 1, 2, 3])
df4 = pd.DataFrame({'E': [10, 10, 10, 10],
                    'F': [10, 10, 10, 10]},
                    index=[0, 1, 2, 3])

In [7]:
df1+df3

Unnamed: 0,A,B,C,D
0,13,-6.0,,1
1,46,,91.0,1
2,20,105.0,62.0,1
3,5,10.0,101.0,1


In [39]:
print(
    df1.add(df3,fill_value=0)
)
print(
    df1+df3
)
# perceber como o fill value serve tanto para NAs no primeiro como no segundo dataframes
print(
    df3.add(df1,fill_value=0)
)

    A      B      C  D
0  13   -6.0   47.0  1
1  46    8.0   91.0  1
2  20  105.0   62.0  1
3   5   10.0  101.0  1
    A      B      C  D
0  13   -6.0    NaN  1
1  46    NaN   91.0  1
2  20  105.0   62.0  1
3   5   10.0  101.0  1
    A      B      C  D
0  13   -6.0   47.0  1
1  46    8.0   91.0  1
2  20  105.0   62.0  1
3   5   10.0  101.0  1


In [49]:
# concatena as colunas mas como não são coincidentes fica tudo NA
df1+df4
# concatena as colunas e cada coluna fica igual pois são não coincidentes
df1.add(df4, fill_value=0)

Unnamed: 0,A,B,C,D,E,F
0,12.0,4.0,47.0,0.0,10.0,10.0
1,44.0,8.0,68.0,0.0,10.0,10.0
2,17.0,100.0,52.0,0.0,10.0,10.0
3,1.0,-3.0,1.0,0.0,10.0,10.0


In [54]:
df1+df2+df3

Unnamed: 0,A,B,C,D
0,14,-16.0,,2
1,48,,114.0,2
2,23,110.0,72.0,2
3,9,23.0,201.0,2


In [59]:
df1.add(df2).sub(df3)

Unnamed: 0,A,B,C,D
0,12,4.0,,0
1,44,,68.0,0
2,17,100.0,52.0,0
3,1,-3.0,1.0,0


In [88]:
A = pd.DataFrame({'A':[10]})
B = pd.DataFrame({'A':[1]})
C = pd.DataFrame({'A':[5]})