In [24]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time

In [25]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

In [26]:
url = 'https://sistemaswebb3-listados.b3.com.br/indexPage/day/IBOV?language=pt-br'

driver.get(url)

In [27]:
# Wait for the container to be available
container = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "divContainerIframeB3"))
)
print("Found the container:", container)


Found the container: <selenium.webdriver.remote.webelement.WebElement (session="50c2ec7c9a768e4b49aaf56bf01cd6aa", element="f.25E37A1C818B666E452D2D495BF2FDCD.d.3A49BFE639AE2FA9E295F4A0A7751F63.e.12")>


In [28]:
# Extract the date from the container
date_element = container.find_element(By.XPATH, "//p[contains(text(), 'Carteira Teórica do IBovespa válida para')]")
date_text = date_element.text.split("para")[-1].strip()  # Extracts '17/03/25'

# Format the date to make it file-system friendly
formatted_date = date_text.replace("/", "-")  # Replace slashes with dashes for file naming

# Locate the table within the container
table = container.find_element(By.TAG_NAME, "table")
rows = table.find_elements(By.TAG_NAME, "tr")

# Extract header
headers = [header.text for header in rows[0].find_elements(By.TAG_NAME, "th")]

# Extract rows
data = []
for row in rows[1:]:
    cols = row.find_elements(By.TAG_NAME, "td")
    data.append([col.text for col in cols])

# Convert to Pandas DataFrame
df = pd.DataFrame(data, columns=headers)

# Add the extracted date to the DataFrame as a new column
df["Date"] = date_text

# Save the DataFrame to a Parquet file with the date in the name
parquet_file_name = f"ibovespa_{formatted_date}.parquet"
df.to_parquet(f'data\\{parquet_file_name}', engine="pyarrow", index=False)

csv_file_name = parquet_file_name.replace(".parquet", ".csv")  # Replace the extension
df.to_csv(f'data\\{csv_file_name}', index=False, encoding="utf-8")

print(f"DataFrame with date column saved as {parquet_file_name}")


DataFrame with date column saved as ibovespa_17-03-25.parquet


In [29]:
df.head()

Unnamed: 0,Código,Ação,Tipo,Qtde. Teórica,Part. (%),Date
0,ALOS3,ALLOS,ON NM,476.976.044,439,17/03/25
1,ABEV3,AMBEV S/A,ON ED,4.394.835.131,2867,17/03/25
2,ASAI3,ASSAI,ON NM,1.345.832.968,491,17/03/25
3,AURE3,AUREN,ON NM,323.738.747,119,17/03/25
4,AMOB3,AUTOMOB,ON NM,533.959.816,7,17/03/25
