# ++ Carregar dados históricos da Bovespa em Dataframe para análise ++

In [1]:
import time
time.asctime( time.localtime(time.time()) )

'Fri Nov 18 18:13:02 2022'

In [2]:
import sys
from pandas import Series
import pandas as pd
from datetime import date
from zipfile import ZipFile
pd.__version__

'1.3.4'

In [3]:
import matplotlib as mat
mat.__version__

'3.4.3'

In [4]:
import matplotlib.pyplot as plt
%matplotlib notebook

In [5]:
from IPython.display import FileLink, FileLinks
FileLink('Manipulação CSV BB.ipynb')

In [6]:
# Setando opção do Pandas para mostrar todas as linhas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [7]:
# Setando separador de milhar
pd.options.display.float_format = '{:,.2f}'.format

In [8]:
LISTA_ARQUIVOS = ['A2022']

In [9]:
def leituraArquivos(periodo):
    arq_zip = 'arquivos/COTAHIST_' + periodo + '.ZIP'
    arq_txt = 'COTAHIST_' + periodo + '.TXT'

    DTEXCH, CODNEG, PREABE, PREMAX, PREMIN, PREULT, VOLTOT = ([] for i in range(7)) 
    
    with ZipFile(arq_zip) as myzip:
        with myzip.open(arq_txt) as myfile:
            for line in myfile:
                if (line.decode('utf-8')[0:2] == '01') and (line.decode('utf-8')[10:12] == '02'):
                    DTEXCH.append(line.decode('utf-8')[2:10])
                    CODNEG.append(line.decode('utf-8')[12:24].rstrip())
                    PREABE.append(int(line.decode('utf-8')[56:69]) / 100)
                    PREMAX.append(int(line.decode('utf-8')[69:82]) / 100)
                    PREMIN.append(int(line.decode('utf-8')[82:95]) / 100)
                    PREULT.append(int(line.decode('utf-8')[108:121]) / 100)
                    VOLTOT.append(int(line.decode('utf-8')[170:188]) / 100)

    df_origem = pd.DataFrame(
        {"cdAcao":CODNEG
        ,"dtPregao":pd.to_datetime(DTEXCH, format="%Y%m%d", errors="ignore")
        ,"vrFech":PREULT
        ,"vrVolume":VOLTOT
        ,"vrMaxDia":PREMAX
        ,"vrMinDia":PREMIN
        ,"vrAbert":PREABE
        }
    ).sort_values(["cdAcao", "dtPregao"], ascending=True)

    return(df_origem)

In [10]:
def carrega_dados(arquivos):
    
    df = leituraArquivos(arquivos[0])
    
    for i in range(1, len(arquivos)):
        df = pd.concat([df, leituraArquivos(arquivos[i])])

#    df = pd.concat([df_112021, df_122021, df])

    df["pcVar"], df["pcMaxDia"], df["pcMinDia"], df["pcAbert"] = [
         ((df.vrFech   / df.vrFech.shift(1)) -1) *100
        ,((df.vrMaxDia / df.vrFech.shift(1)) -1) *100
        ,((df.vrMinDia / df.vrFech.shift(1)) -1) *100
        ,((df.vrAbert  / df.vrFech.shift(1)) -1) *100
        ]
    
    df["ic05"], df["ic10"], df["ic15"], df["ic20"], df["ic25"], df["ic30"] = [
         df.apply(condicao05, axis=1)
        ,df.apply(condicao10, axis=1)
        ,df.apply(condicao15, axis=1)
        ,df.apply(condicao20, axis=1)
        ,df.apply(condicao25, axis=1)
        ,df.apply(condicao30, axis=1)
        ]

    return df

In [11]:
def condicao05(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 0.5) else 0
def condicao10(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 1) else 0
def condicao15(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 1.5) else 0
def condicao20(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 2) else 0
def condicao25(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 2.5) else 0
def condicao30(df_tmp):
    return 1 if (df_tmp["pcMaxDia"] > 3) else 0

In [12]:
def buscaPeriodos(df, qt_dias):
    return df.loc[df["dtPregao"]>=(df.dtPregao.drop_duplicates().sort_values(ascending=False).iloc[qt_dias])].sort_values(["cdAcao", "dtPregao"], ascending=False)

In [13]:
def somatorioPcMaxDia(df_ent, pc, index_name):
    return df_ent.groupby("cdAcao")["pcMaxDia"].apply(lambda x: (x>pc).sum()).reset_index(name=index_name)

In [14]:
def buscaMedia(df_ent, coluna, index_name):
    return df_ent.groupby("cdAcao")[coluna].agg("mean").reset_index(name=index_name)

In [15]:
def montaDfPeriodos(df_origem, qt_dias):
    df_dias  = buscaPeriodos(df_origem, qt_dias-1)
    
    df05 = somatorioPcMaxDia(df_dias, 0.5, "0.5%")
    df10 = somatorioPcMaxDia(df_dias, 1.0, "resultado")
    df15 = somatorioPcMaxDia(df_dias, 1.5, "resultado")
    df20 = somatorioPcMaxDia(df_dias, 2.0, "resultado")
    df25 = somatorioPcMaxDia(df_dias, 2.5, "resultado")
    df30 = somatorioPcMaxDia(df_dias, 3.0, "resultado")
    dfVol     = buscaMedia(df_dias, "vrVolume", "vol")
    dfVrFech  = buscaMedia(df_dias, "vrFech", "vrFech")
    dfPcAbert = buscaMedia(df_dias, "pcAbert", "pcAbert")
    
    df05["1.0%"], df05["1.5%"], df05["2.0%"], df05["2.5%"], df05["3.0%"] , df05["AvgVol"], df05["AvgVrFech"], df05["AvgPcAbert"] = [
    df10["resultado"],df15["resultado"],df20["resultado"],df25["resultado"],df30["resultado"],dfVol["vol"],dfVrFech["vrFech"],dfPcAbert["pcAbert"]]

    df_result = df05.reset_index(drop=True).sort_values(["1.0%", "1.5%", "2.0%", "2.5%", "3.0%"], 
                                                        ascending=False)

    return df_result

In [16]:
def montaTabela(df_n_dias, vol, col_pc, pc_min, avg_vr_fech):
    return df_n_dias.loc[(df_n_dias["AvgVol"] > vol) & (df_n_dias[col_pc] >= pc_min) & (df_n_dias["AvgVrFech"] > avg_vr_fech)]

In [17]:
def consultaAcao(df, cd_acao):
    return df.loc[(df["cdAcao"] == cd_acao)].replace(0, "").sort_values(["dtPregao"], ascending=False)

In [18]:
df = carrega_dados(LISTA_ARQUIVOS)

### 25 dias, order by 1%

In [19]:
montaTabela(montaDfPeriodos(df, 25), 6000000, '1.0%', 20, 5)

Unnamed: 0,cdAcao,0.5%,1.0%,1.5%,2.0%,2.5%,3.0%,AvgVol,AvgVrFech,AvgPcAbert
241,KLBN11,24,21,14,12,9,7,151108886.44,21.48,-0.1
202,GFSA3,22,20,19,16,15,13,13921242.88,7.55,0.0
168,EMBR3,20,20,17,13,11,8,114103613.96,13.0,-0.27
342,ROMI3,22,20,17,13,11,7,11701419.6,15.94,-0.11
325,PTBL3,20,20,17,13,10,8,16481374.52,11.87,-0.46
169,ENAT3,20,20,17,10,8,6,23982838.68,15.2,-0.54
142,CVCB3,21,20,16,14,12,10,100972583.72,6.59,-0.22
377,SUZB3,25,20,15,14,9,6,376388667.12,52.97,0.39
20,ANIM3,21,20,15,13,11,8,29838634.72,6.13,0.03


### 20 dias, order by 1%

In [20]:
montaTabela(montaDfPeriodos(df, 20), 6000000, '1.0%', 16, 5)

Unnamed: 0,cdAcao,0.5%,1.0%,1.5%,2.0%,2.5%,3.0%,AvgVol,AvgVrFech,AvgPcAbert
338,ROMI3,18,17,15,11,10,7,13165992.5,16.01,-0.1
165,ENAT3,17,17,14,9,7,5,25356977.85,15.17,-0.62
198,GFSA3,18,16,15,13,13,11,14940205.85,7.51,-0.08
422,YDUQ3,17,16,15,12,11,8,128217292.8,13.76,0.05
324,QUAL3,17,16,14,12,9,9,40647566.15,7.28,-0.4
46,BLAU3,17,16,14,9,8,6,12361474.8,31.73,0.22
398,UGPA3,18,16,13,13,7,7,121907425.65,13.37,-0.45
164,EMBR3,16,16,13,10,8,6,119211444.05,13.26,-0.42
139,CVCB3,17,16,12,11,9,7,104341414.45,6.56,-0.4
373,SUZB3,20,16,12,11,7,5,417978836.2,54.1,0.42


### 15 dias, order by 1%

In [21]:
montaTabela(montaDfPeriodos(df, 15), 6000000, '1.5%', 12, 5)

Unnamed: 0,cdAcao,0.5%,1.0%,1.5%,2.0%,2.5%,3.0%,AvgVol,AvgVrFech,AvgPcAbert
197,GFSA3,13,13,12,10,10,9,15904895.13,7.59,-0.24
332,ROMI3,13,13,12,9,9,6,13533994.33,16.05,-0.12
138,CVCB3,12,12,12,11,9,7,112025684.67,6.47,-0.35
318,QUAL3,12,12,12,11,8,8,41285787.4,7.3,-0.44
415,YDUQ3,13,12,12,9,9,6,141911585.4,14.14,0.22


### 10 dias, order by 1%

In [22]:
montaTabela(montaDfPeriodos(df, 10), 6000000, '1.0%', 9, 5)

Unnamed: 0,cdAcao,0.5%,1.0%,1.5%,2.0%,2.5%,3.0%,AvgVol,AvgVrFech,AvgPcAbert
359,SUZB3,10,10,8,7,5,3,419114647.1,56.29,0.29
383,UGPA3,9,9,7,7,2,2,138084024.3,13.61,-0.62
407,ZAMP3,9,9,6,6,5,3,11662529.5,6.73,-0.65
229,KLBN11,10,9,6,5,4,3,152398400.5,22.68,-0.33


### 5 dias, order by 1%

In [23]:
montaTabela(montaDfPeriodos(df, 5), 6000000, '1.0%', 5, 5)

Unnamed: 0,cdAcao,0.5%,1.0%,1.5%,2.0%,2.5%,3.0%,AvgVol,AvgVrFech,AvgPcAbert
349,SUZB3,5,5,4,4,3,2,566764533.0,57.78,0.13
379,VALE3,5,5,4,3,3,2,3788972391.6,81.14,0.12
37,BBDC4,5,5,4,3,1,1,1691471010.4,15.19,0.64
36,BBDC3,5,5,3,1,1,0,196288005.6,13.03,0.46


### Consulta ação específica

In [24]:
consultaAcao(df, "SUZB3")

Unnamed: 0,cdAcao,dtPregao,vrFech,vrVolume,vrMaxDia,vrMinDia,vrAbert,pcVar,pcMaxDia,pcMinDia,pcAbert,ic05,ic10,ic15,ic20,ic25,ic30
172323,SUZB3,2022-11-17,57.94,493182265.0,59.88,57.53,58.46,-1.13,2.18,-1.83,-0.24,1.0,1.0,1.0,1.0,,
171964,SUZB3,2022-11-16,58.6,753985939.0,59.24,57.51,57.73,1.49,2.6,-0.4,-0.02,1.0,1.0,1.0,1.0,1.0,
171592,SUZB3,2022-11-14,57.74,328863051.0,58.98,56.96,58.98,-0.88,1.25,-2.21,1.25,1.0,1.0,,,,
171221,SUZB3,2022-11-11,58.25,608171092.0,59.21,56.26,56.5,3.32,5.02,-0.21,0.21,1.0,1.0,1.0,1.0,1.0,1.0
170856,SUZB3,2022-11-10,56.38,649620318.0,56.79,54.81,54.81,2.29,3.03,-0.56,-0.56,1.0,1.0,1.0,1.0,1.0,1.0
170492,SUZB3,2022-11-09,55.12,220232558.0,56.26,54.74,55.28,-0.6,1.46,-1.28,-0.31,1.0,1.0,,,,
170126,SUZB3,2022-11-08,55.45,302031880.0,56.38,54.9,55.12,0.85,2.55,-0.15,0.25,1.0,1.0,1.0,1.0,1.0,
169767,SUZB3,2022-11-07,54.98,381223751.0,55.32,53.62,54.49,0.92,1.54,-1.58,0.02,1.0,1.0,1.0,,,
169404,SUZB3,2022-11-04,54.48,224792859.0,55.57,53.9,55.0,1.04,3.06,-0.04,2.0,1.0,1.0,1.0,1.0,1.0,1.0
169030,SUZB3,2022-11-03,53.92,229042758.0,54.0,51.72,53.0,1.99,2.14,-2.18,0.25,1.0,1.0,1.0,1.0,,


In [25]:
# dfDtMin = df5d.loc[(df["dtPregao"] == dt_min_5d) & (df["cdAcao"] == 'PTBL3')]

df25d = buscaPeriodos(df, 24)
df5d = buscaPeriodos(df, 4)

dt_min_5d  = df5d["dtPregao"].min()
dt_min_25d = df25d["dtPregao"].min()
dt_max     = df5d["dtPregao"].max()
dfDtMin5d  = df5d.loc[(df5d["dtPregao"] == dt_min_5d)].set_index(["cdAcao"])
dfDtMin25d = df25d.loc[(df25d["dtPregao"] == dt_min_25d)].set_index(["cdAcao"])
dfDtMax    = df5d.loc[(df5d["dtPregao"] == dt_max)].set_index(["cdAcao"])

In [26]:
 dfPc5d = pd.DataFrame({
         "dtInicio":dfDtMin5d["dtPregao"] ,"dtFim":dfDtMax["dtPregao"]
        ,"vrInicio":dfDtMin5d["vrFech"]   ,"vrFim":dfDtMax["vrFech"]
        ,"pcPeriodo":((dfDtMax["vrFech"] - dfDtMin5d["vrFech"]) / dfDtMin5d["vrFech"]) * 100
    })

In [27]:
dfPc5d.sort_values(["pcPeriodo"], ascending=False)

Unnamed: 0_level_0,dtInicio,dtFim,vrInicio,vrFim,pcPeriodo
cdAcao,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CTSA4,2022-11-10,2022-11-17,0.62,2.21,256.45
CTSA3,2022-11-10,2022-11-17,1.39,4.18,200.72
LEVE3,2022-11-10,2022-11-17,25.36,29.59,16.68
BRAP3,2022-11-10,2022-11-17,22.11,24.9,12.62
BRAP4,2022-11-10,2022-11-17,24.57,27.48,11.84
VALE3,2022-11-10,2022-11-17,74.55,83.1,11.47
CSNA3,2022-11-10,2022-11-17,13.21,14.61,10.6
MYPK3,2022-11-10,2022-11-17,12.37,13.63,10.19
OSXB3,2022-11-10,2022-11-17,6.09,6.69,9.85
BRIV4,2022-11-10,2022-11-17,7.28,7.98,9.62


In [28]:
 dfPc25d = pd.DataFrame({
         "dtInicio":dfDtMin25d["dtPregao"] ,"dtFim":dfDtMax["dtPregao"]
        ,"vrInicio":dfDtMin25d["vrFech"]   ,"vrFim":dfDtMax["vrFech"]
        ,"pcPeriodo":((dfDtMax["vrFech"] - dfDtMin25d["vrFech"]) / dfDtMin25d["vrFech"]) * 100
    })

In [29]:
dfPc25d.sort_values(["pcPeriodo"], ascending=False)

Unnamed: 0_level_0,dtInicio,dtFim,vrInicio,vrFim,pcPeriodo
cdAcao,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CTSA4,2022-10-11,2022-11-17,0.68,2.21,225.0
CTSA3,2022-10-11,2022-11-17,1.4,4.18,198.57
LEVE3,2022-10-11,2022-11-17,23.97,29.59,23.45
MNDL3,2022-10-11,2022-11-17,30.0,36.99,23.3
GGBR3,2022-10-11,2022-11-17,21.35,26.09,22.2
VIVR3,2022-10-11,2022-11-17,0.5,0.61,22.0
SUZB3,2022-10-11,2022-11-17,47.63,57.94,21.65
OSXB3,2022-10-11,2022-11-17,5.67,6.69,17.99
GGBR4,2022-10-11,2022-11-17,25.86,30.36,17.4
WEGE3,2022-10-11,2022-11-17,33.9,39.55,16.67


In [30]:
### 25 dias, Order by 0.5% # df05.loc[(df05["AvgVol"] > 6000000) & (df05["0.5%"] >= 23) & (df05["AvgVrFech"] > 5)].sort_values(["0.5%", "1.0%", "1.5%", "2.0%"], ascending=False)
### 20 dias, Order by 0.5% # df05_20d.loc[(df05_20d["AvgVol"] > 6000000) & (df05_20d["0.5%"] >= 19) & (df05_20d["AvgVrFech"] > 5)].sort_values(["0.5%", "1.0%", "1.5%", "2.0%"], ascending=False)
### 15 dias, Order by 0.5% # df05_15d.loc[(df05_15d["AvgVol"] > 6000000) & (df05_15d["0.5%"] >= 15) & (df05_15d["AvgVrFech"] > 5)].sort_values(["0.5%", "1.0%", "1.5%", "2.0%"], ascending=False)

## Definindo valores para compra

In [31]:
# Valor disponível para cmp / 1000
# Valor da ação no leilão
vr_corretagem = 9.8
emolumentos = 0.000325
acao = 'LIGT3'
vr_disp = 110
vr_acao_leilao = 1148 / 100
vr_qtd = round((vr_disp * 1000) / (vr_acao_leilao * 100)) * 100

In [32]:
vr_cmp = round(vr_acao_leilao * 1.014, 2)

vr_tot_cmp = vr_acao_leilao * vr_qtd
corret_cmp = vr_corretagem + (vr_tot_cmp * emolumentos)
vr_tot_liq_cmp = vr_tot_cmp + corret_cmp

vr_acao_vnd_10 = float('{0:.2f}'.format(vr_acao_leilao * 1.0108 + 0.01))
vr_tot_vnd_10 = vr_acao_vnd_10 * vr_qtd
corret_vnd_10 = vr_corretagem + (vr_tot_vnd_10 * emolumentos)
vr_tot_liq_vnd_10 = vr_tot_vnd_10 - corret_vnd_10

vr_acao_vnd_15 = float('{0:.2f}'.format(vr_acao_leilao * 1.0158 + 0.01))
vr_tot_vnd_15 = vr_acao_vnd_15 * vr_qtd
corret_vnd_15 = vr_corretagem + (vr_tot_vnd_15 * emolumentos)
vr_tot_liq_vnd_15 = vr_tot_vnd_15 - corret_vnd_15

In [33]:
d = {'Compra':     ['Quantidade', 'Valor Leilão', 'Total', 'Total líquido', 'Corretagem Compra'],
     ' ':          [vr_qtd, vr_acao_leilao, vr_tot_cmp, vr_tot_liq_cmp, corret_cmp],
     'Venda 1,0%': ['Valor compra', 'Valor', 'Total', 'Total líquido', 'Corretagem Venda'],
     '1,0%':       [vr_cmp, vr_acao_vnd_10, vr_tot_vnd_10, vr_tot_liq_vnd_10, corret_vnd_10],
     'Venda 1,5%': [' ', 'Valor', 'Total', 'Total líquido', 'Corretagem Venda'],
     '1,5%':       [' ', vr_acao_vnd_15, vr_tot_vnd_15, vr_tot_liq_vnd_15, corret_vnd_15]
    }
df_calc = pd.DataFrame(data=d)
df_calc

Unnamed: 0,Compra,Unnamed: 2,"Venda 1,0%","1,0%","Venda 1,5%","1,5%"
0,Quantidade,9600.0,Valor compra,11.64,,
1,Valor Leilão,11.48,Valor,11.61,Valor,11.67
2,Total,110208.0,Total,111456.0,Total,112032.0
3,Total líquido,110253.62,Total líquido,111409.98,Total líquido,111985.79
4,Corretagem Compra,45.62,Corretagem Venda,46.02,Corretagem Venda,46.21


In [34]:
d2 = {'Index':      ['Quantidade', 'Valor', 'Total', 'Total líquido', 'Corretagem'],
      'Compra':     [vr_qtd, vr_cmp, vr_tot_cmp, vr_tot_liq_cmp, corret_cmp],
      'Venda 1.0%': [vr_qtd, vr_acao_vnd_10, vr_tot_vnd_10, vr_tot_liq_vnd_10, corret_vnd_10],
      'Venda 1.5%': [vr_qtd, vr_acao_vnd_15, vr_tot_vnd_15, vr_tot_liq_vnd_15, corret_vnd_15]
     }
df_calc2 = pd.DataFrame(data=d2)
df_calc2.set_index('Index')
df_calc2

Unnamed: 0,Index,Compra,Venda 1.0%,Venda 1.5%
0,Quantidade,9600.0,9600.0,9600.0
1,Valor,11.64,11.61,11.67
2,Total,110208.0,111456.0,112032.0
3,Total líquido,110253.62,111409.98,111985.79
4,Corretagem,45.62,46.02,46.21


In [35]:
time.asctime( time.localtime(time.time()) )

'Fri Nov 18 18:13:32 2022'