# Bibliotecas

In [1]:
import numpy as np
import pandas as pd

# Preparação dos Dados

In [2]:
# Dados obtidos em: https://br.investing.com/indices/bovespa-historical-data

# Carregando o CSV
data = pd.read_csv('dados/ibovespa-201907-202406.csv', dtype=str)

# Renomeando as variáveis
data.columns = ['date', 'closing', 'opening', 'max', 'min',
                'volume', 'variation']

# Date as date data type
data['date'] = pd.to_datetime(data['date'], format='%d.%m.%Y')

# Limpando os caracteres para converter para float
for var in ['closing', 'opening', 'max', 'min', 'variation']:
    data[var] = data[var].str.replace('%', '')\
                         .str.replace('.', '')\
                         .str.replace(',', '.')\
                         .astype(float)

# Ajustando a coluna 'vol'
def convert_volume(volume):
    if 'M' in volume:
        return float(volume.replace('M', '').replace(',', '.')) * 1e6
    elif 'K' in volume:
        return float(volume.replace('K', '').replace(',', '.')) * 1e3
    else:
        return float(volume.replace(',', '.'))

data['volume'] = data['volume'].apply(convert_volume)

# Descrição dos Dados

**Variáveis:**

|Variável|Descrição|
|-|-|
|date|Data do Pregão|
|closing|Valor no Fechamento|
|opening|Valor na Abertura|
|max|Valor Máximo|
|min|Valor Mínimo|
|volume|Volume Negociado|
|variation|Variação percentual com o fechamento do dia anterior|

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1244 entries, 0 to 1243
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       1244 non-null   datetime64[ns]
 1   closing    1244 non-null   float64       
 2   opening    1244 non-null   float64       
 3   max        1244 non-null   float64       
 4   min        1244 non-null   float64       
 5   volume     1244 non-null   float64       
 6   variation  1244 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 68.2 KB


In [4]:
data.head()

Unnamed: 0,date,closing,opening,max,min,volume,variation
0,2024-06-28,123907.0,124308.0,124500.0,123298.0,9070000.0,-0.32
1,2024-06-27,124308.0,122642.0,124308.0,122642.0,8710000.0,1.36
2,2024-06-26,122641.0,122331.0,122701.0,121402.0,8180000.0,0.25
3,2024-06-25,122331.0,122634.0,122849.0,121997.0,6940000.0,-0.25
4,2024-06-24,122637.0,121343.0,122840.0,121307.0,7840000.0,1.07


In [5]:
data.describe(include=np.number)

Unnamed: 0,closing,opening,max,min,volume,variation
count,1244.0,1244.0,1244.0,1244.0,1244.0,1244.0
mean,111065.645498,111047.559486,112055.311897,110048.052251,10442000.0,0.029928
std,11744.020943,11741.63379,11490.031702,11955.624684,3415931.0,1.632809
min,63570.0,63604.0,67604.0,61691.0,424320.0,-14.78
25%,103948.5,103940.0,104820.75,103085.0,8400000.0,-0.74
50%,111892.0,111882.0,112954.0,110730.5,10395000.0,0.055
75%,118883.5,118865.75,119627.75,117938.5,12485000.0,0.8525
max,134194.0,134194.0,134392.0,133832.0,26030000.0,13.91
