In [1]:
import importlib
import os
import zipfile as zf
import duckdb
import pandas as pd
import numpy as np
import pyarrow as pa
import cvm

importlib.reload(cvm)
pd.set_option("max_colwidth", 20)

In [5]:
files_names = sorted(os.listdir('./data/raw/'))
files_names

['dfp_cia_aberta_2010.zip',
 'dfp_cia_aberta_2011.zip',
 'dfp_cia_aberta_2012.zip',
 'dfp_cia_aberta_2013.zip',
 'dfp_cia_aberta_2014.zip',
 'dfp_cia_aberta_2015.zip',
 'dfp_cia_aberta_2016.zip',
 'dfp_cia_aberta_2017.zip',
 'dfp_cia_aberta_2018.zip',
 'dfp_cia_aberta_2019.zip',
 'dfp_cia_aberta_2020.zip',
 'dfp_cia_aberta_2021.zip',
 'itr_cia_aberta_2020.zip',
 'itr_cia_aberta_2021.zip',
 'itr_cia_aberta_2022.zip']

In [6]:
def df_format(df) -> pd.DataFrame:
    df.VERSAO = df.VERSAO.astype(np.int8)  # unique -> ['3', '2', '4', '1', '7', '5', '6', '9', '8']
    df.CD_CVM = df.CD_CVM.astype(np.int32)  # max < 600_000
    df.VL_CONTA = df.VL_CONTA.astype(float)

    # df.MOEDA.value_counts()
    # REAL    43391302
    df.drop(columns=['MOEDA'], inplace=True)
    
    # df.ESCALA_MOEDA.value_counts()
    # MIL        40483230
    # UNIDADE     2908072
    df.ESCALA_MOEDA = df.ESCALA_MOEDA.map({'MIL': 1000, 'UNIDADE': 1})

    # unit base currency
    df.VL_CONTA = df.VL_CONTA * df.ESCALA_MOEDA
    df.drop(columns=['ESCALA_MOEDA'], inplace=True)

    # df.ST_CONTA_FIXA.unique() -> ['S', 'N']
    df.ST_CONTA_FIXA = df.ST_CONTA_FIXA.map({'S': True, 'N': False})

    # df.ORDEM_EXERC.unique() -> ['PENÚLTIMO', 'ÚLTIMO']
    df.ORDEM_EXERC = df.ORDEM_EXERC.map({'ÚLTIMO': 0, 'PENÚLTIMO': -1})
    df.ORDEM_EXERC = df.ORDEM_EXERC.astype(np.int8)

    # BPA, BPP and DFC files have no DT_INI_EXERC column
    if 'DT_INI_EXERC' in df.columns:
        df.DT_INI_EXERC = pd.to_datetime(df.DT_INI_EXERC)
    else:
        # column_order.remove('DT_INI_EXERC')
        df['DT_INI_EXERC'] = pd.NaT
    if 'COLUNA_DF' not in df.columns: df['COLUNA_DF'] = np.nan

    column_order = [
        'CD_CVM', 'CNPJ_CIA', 'DENOM_CIA', 'GRUPO_DFP', 'VERSAO', 'DT_REFER',
        'DT_INI_EXERC', 'DT_FIM_EXERC', 'ORDEM_EXERC', 'CD_CONTA', 'DS_CONTA',
        'ST_CONTA_FIXA', 'COLUNA_DF', 'VL_CONTA']

    df = df[column_order]
    return df 

In [7]:
date_columns = ['DT_REFER', 'DT_FIM_EXERC']
kwargs = {
    'sep': ';',
    'encoding': 'iso-8859-1',
    'dtype': str,
    'parse_dates': date_columns}

In [8]:
# full scan = 8m 10s -> 5.6 GB df memory space
# formated colums = 4m 30s -> 3.5 GB df memory space
# zstd -> 16.5 seg
# lz4 -> 15.4 seg
# df_all = pd.DataFrame()
for parent_file_name in files_names[:]:
    print(parent_file_name)
    file_path = f'./data/raw/{parent_file_name}'
    parent_file = zf.ZipFile(file_path)
    # print(parent_file.namelist())
    df_year = pd.DataFrame()
    child_file_names = parent_file.namelist()
    for child_file_name in child_file_names[1:]:
        # print(child_parent_file_name)
        child_file = parent_file.open(child_file_name)
        df_child = pd.read_csv(child_file, **kwargs)
        df_child = df_format(df_child)        
        # df_all = pd.concat([df_all, df_child], ignore_index=True)
        df_year = pd.concat([df_year, df_child], ignore_index=True)
    sort_by = [
        'CD_CVM', 'GRUPO_DFP', 'VERSAO', 'ORDEM_EXERC', 'DT_REFER', 'CD_CONTA']
    df_year.sort_values(by=sort_by, ignore_index=True, inplace=True)
    df_year.to_parquet(
        f'./data/processed/{parent_file_name[:-4]}.parquet',
        index=False,
        compression='zstd')

dfp_cia_aberta_2010.zip
dfp_cia_aberta_2011.zip
dfp_cia_aberta_2012.zip
dfp_cia_aberta_2013.zip
dfp_cia_aberta_2014.zip
dfp_cia_aberta_2015.zip
dfp_cia_aberta_2016.zip
dfp_cia_aberta_2017.zip
dfp_cia_aberta_2018.zip
dfp_cia_aberta_2019.zip
dfp_cia_aberta_2020.zip
dfp_cia_aberta_2021.zip
itr_cia_aberta_2020.zip
itr_cia_aberta_2021.zip
itr_cia_aberta_2022.zip


In [17]:
pd.read_parquet('./data/processed/dfp_cia_aberta_2020.parquet')

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
0,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,1,2020-12-31,NaT,2019-12-31,-1,1,Ativo Total,True,,955559000.0
1,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,1,2020-12-31,NaT,2019-12-31,-1,1.01,Ativo Circulante,True,,707312000.0
2,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,1,2020-12-31,NaT,2019-12-31,-1,1.01.01,Caixa e Equivale...,True,,22473000.0
3,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,1,2020-12-31,NaT,2019-12-31,-1,1.01.02,Aplicações Finan...,True,,294151000.0
4,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,1,2020-12-31,NaT,2019-12-31,-1,1.01.02.01,Aplicações Finan...,True,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1179381,80209,24.410.913/0001-44,NU HOLDINGS LTD.,DF Consolidado -...,1,2020-12-31,2020-01-01,2020-12-31,0,3.99,Lucro por Ação (...,False,,-1346.0
1179382,80209,24.410.913/0001-44,NU HOLDINGS LTD.,DF Consolidado -...,1,2020-12-31,2020-01-01,2020-12-31,0,3.99.01,Lucro Básico por...,False,,-673.0
1179383,80209,24.410.913/0001-44,NU HOLDINGS LTD.,DF Consolidado -...,1,2020-12-31,2020-01-01,2020-12-31,0,3.99.01.01,ON,False,,-673.0
1179384,80209,24.410.913/0001-44,NU HOLDINGS LTD.,DF Consolidado -...,1,2020-12-31,2020-01-01,2020-12-31,0,3.99.02,Lucro Diluído po...,False,,-673.0


In [5]:
df = pd.read_parquet('./data/processed/dfp_cia_aberta_2020.parquet', filters=[('CD_CVM','==',9512)])
df

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
0,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1,Ativo Total,True,,9.260110e+11
1,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01,Ativo Circulante,True,,1.121010e+11
2,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.01,Caixa e Equivale...,True,,2.971400e+10
3,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.02,Aplicações Finan...,True,,3.580000e+09
4,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.02.01,Aplicações Finan...,True,,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2183,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,2,2020-12-31,2020-01-01,2020-12-31,0,3.99.01.01,ON,False,,5.400000e+02
2184,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,2,2020-12-31,2020-01-01,2020-12-31,0,3.99.01.02,PN,False,,5.400000e+02
2185,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,2,2020-12-31,2020-01-01,2020-12-31,0,3.99.02,Lucro Diluído po...,False,,0.000000e+00
2186,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,2,2020-12-31,2020-01-01,2020-12-31,0,3.99.02.01,ON,False,,5.400000e+02


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2188 entries, 0 to 2187
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   CD_CVM         2188 non-null   int32         
 1   CNPJ_CIA       2188 non-null   object        
 2   DENOM_CIA      2188 non-null   object        
 3   GRUPO_DFP      2188 non-null   object        
 4   VERSAO         2188 non-null   int8          
 5   DT_REFER       2188 non-null   datetime64[ns]
 6   DT_INI_EXERC   1488 non-null   datetime64[ns]
 7   DT_FIM_EXERC   2188 non-null   datetime64[ns]
 8   ORDEM_EXERC    2188 non-null   int8          
 9   CD_CONTA       2188 non-null   object        
 10  DS_CONTA       2188 non-null   object        
 11  ST_CONTA_FIXA  2188 non-null   bool          
 12  COLUNA_DF      832 non-null    object        
 13  VL_CONTA       2188 non-null   float64       
dtypes: bool(1), datetime64[ns](3), float64(1), int32(1), int8(2), object(6)


In [4]:
import pyarrow.parquet as pq
table = pq.read_table(
    "./data/processed/",
    use_legacy_dataset=False,
    filters=[('CD_CVM','==',9512)])
df = table.to_pandas()
df

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
0,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,3,2010-12-31,NaT,2009-12-31,-1,1,Ativo Total,True,,3.504189e+11
1,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,3,2010-12-31,NaT,2009-12-31,-1,1.01,Ativo Circulante,True,,7.437357e+10
2,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,3,2010-12-31,NaT,2009-12-31,-1,1.01.01,Caixa e Equivale...,True,,2.903423e+10
3,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,3,2010-12-31,NaT,2009-12-31,-1,1.01.01.01,Caixa e Bancos,False,,2.853964e+09
4,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,3,2010-12-31,NaT,2009-12-31,-1,1.01.01.02,Aplicações Finan...,False,,2.618026e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37109,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2021-09-30,2021-07-01,2021-09-30,0,3.99.02,Lucro Diluído po...,False,,0.000000e+00
37110,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2021-09-30,2021-01-01,2021-09-30,0,3.99.02.01,ON,False,,5.760000e+03
37111,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2021-09-30,2021-07-01,2021-09-30,0,3.99.02.01,ON,False,,2.390000e+03
37112,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2021-09-30,2021-01-01,2021-09-30,0,3.99.02.02,PN,False,,5.760000e+03


In [7]:
import pyarrow.parquet as pq
dataset = pq.ParquetDataset("./data/processed/", filters=[('CD_CVM','==',9512)])
dataset.read().to_pandas()

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
0,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,2,2010-12-31,NaT,2009-12-31,-1,1,Ativo Total,True,,211293000.0
1,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,2,2010-12-31,NaT,2009-12-31,-1,1.01,Ativo Circulante,True,,164307000.0
2,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,2,2010-12-31,NaT,2009-12-31,-1,1.01.01,Caixa e Equivale...,True,,2055000.0
3,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,2,2010-12-31,NaT,2009-12-31,-1,1.01.02,Aplicações Finan...,True,,44510000.0
4,94,92.693.019/0001-89,PANATLANTICA S.A.,DF Consolidado -...,2,2010-12-31,NaT,2009-12-31,-1,1.01.02.01,Aplicações Finan...,True,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17608706,25810,31.701.408/0001-14,ATHENA SAÚDE BRA...,DF Individual - ...,1,2022-03-31,2022-01-01,2022-03-31,0,3.99,Lucro por Ação -...,False,,0.0
17608707,25810,31.701.408/0001-14,ATHENA SAÚDE BRA...,DF Individual - ...,1,2022-03-31,2022-01-01,2022-03-31,0,3.99.01,Lucro Básico por...,False,,0.0
17608708,25810,31.701.408/0001-14,ATHENA SAÚDE BRA...,DF Individual - ...,1,2022-03-31,2022-01-01,2022-03-31,0,3.99.01.01,ON,False,,-62.6
17608709,25810,31.701.408/0001-14,ATHENA SAÚDE BRA...,DF Individual - ...,1,2022-03-31,2022-01-01,2022-03-31,0,3.99.02,Lucro Diluído po...,False,,0.0


In [21]:
table = dataset.to_table()
table.to_pandas()

TypeError: to_pandas() got an unexpected keyword argument 'filters'

In [24]:
sql = """
SELECT *
FROM parquet_scan('data/processed/*.parquet')
WHERE CD_CVM = 9512;
"""
df = duckdb.query(sql).to_df()
df

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
0,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1,Ativo Total,True,,9.260110e+11
1,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01,Ativo Circulante,True,,1.121010e+11
2,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.01,Caixa e Equivale...,True,,2.971400e+10
3,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.02,Aplicações Finan...,True,,3.580000e+09
4,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,NaT,2019-12-31,-1,1.01.02.01,Aplicações Finan...,True,,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37109,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,3.99.01.01,ON,False,,1.790000e+03
37110,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,3.99.01.02,PN,False,,1.790000e+03
37111,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,3.99.02,Lucro Diluído po...,False,,0.000000e+00
37112,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,3.99.02.01,ON,False,,1.790000e+03


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37114 entries, 0 to 37113
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   CD_CVM         37114 non-null  int32         
 1   CNPJ_CIA       37114 non-null  object        
 2   DENOM_CIA      37114 non-null  object        
 3   GRUPO_DFP      37114 non-null  object        
 4   VERSAO         37114 non-null  int32         
 5   DT_REFER       37114 non-null  datetime64[ns]
 6   DT_INI_EXERC   25026 non-null  datetime64[ns]
 7   DT_FIM_EXERC   37114 non-null  datetime64[ns]
 8   ORDEM_EXERC    37114 non-null  int32         
 9   CD_CONTA       37114 non-null  object        
 10  DS_CONTA       37114 non-null  object        
 11  ST_CONTA_FIXA  37114 non-null  bool          
 12  COLUNA_DF      13916 non-null  object        
 13  VL_CONTA       37114 non-null  float64       
dtypes: bool(1), datetime64[ns](3), float64(1), int32(3), object(6)
memory 

In [19]:
df[df.COLUNA_DF.notnull()]

Unnamed: 0,CD_CVM,CNPJ_CIA,DENOM_CIA,GRUPO_DFP,VERSAO,DT_REFER,DT_INI_EXERC,DT_FIM_EXERC,ORDEM_EXERC,CD_CONTA,DS_CONTA,ST_CONTA_FIXA,COLUNA_DF,VL_CONTA
348,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,2019-01-01,2019-12-31,-1,5.01,Saldos Iniciais,True,Capital Social I...,2.054320e+11
349,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,2019-01-01,2019-12-31,-1,5.01,Saldos Iniciais,True,Reservas de Capi...,2.674000e+09
350,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,2019-01-01,2019-12-31,-1,5.01,Saldos Iniciais,True,Reservas de Lucro,9.514800e+10
351,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,2019-01-01,2019-12-31,-1,5.01,Saldos Iniciais,True,Lucros ou Prejuí...,0.000000e+00
352,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Consolidado -...,2,2020-12-31,2019-01-01,2019-12-31,-1,5.01,Saldos Iniciais,True,Outros Resultado...,-2.602900e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36845,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,5.07,Saldos Finais,True,Reservas de Capi...,1.048205e+09
36846,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,5.07,Saldos Finais,True,Reservas de Lucro,1.489243e+11
36847,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,5.07,Saldos Finais,True,Lucros ou Prejuí...,0.000000e+00
36848,9512,33.000.167/0001-01,PETROLEO BRASILE...,DF Individual - ...,1,2013-12-31,2013-01-01,2013-12-31,0,5.07,Saldos Finais,True,Outros Resultado...,-7.243606e+09


In [2]:
con = duckdb.connect(database='data/db/my-db.duckdb', read_only=False)
con

<duckdb.DuckDBPyConnection at 0x7f98ec197330>

In [3]:
sql = "CREATE TABLE teste_tb AS SELECT * FROM parquet_scan('data/processed/*.parquet');"
con.execute(sql).fetchall()

[(17608711,)]

In [3]:
cursor.execute('SHOW TABLES').fetchall()
cursor.query('SELECT * FROM teste_tb LIMIT 5;').to_df()

RuntimeError: Catalog Error: Table with name teste_tb does not exist!
Did you mean "information_schema.tables"?

In [12]:
# gz -> 46s
# zip -> 11s (33s)
# bz2 -> 2 min
# xz -> 1 min
# zstd -> 5.4s (1.9s)
# lz4 -> 5.6s (1.9)
# c_options = {'compresslevel': 5}
# df.to_pickle('data/teste.zip', compression='zip')
# pd.read_pickle('data/teste.zip')
df.to_parquet(f'data/teste.parquet', compression='zstd')
# pd.read_parquet('data/teste.parquet')

In [31]:
df.GRUPO_DFP.unique()
# df

array(['DF Consolidado - Balanço Patrimonial Ativo',
       'DF Individual - Balanço Patrimonial Ativo',
       'DF Consolidado - Balanço Patrimonial Passivo',
       'DF Individual - Balanço Patrimonial Passivo',
       'DF Consolidado - Demonstração do Fluxo de Caixa (Método Direto)',
       'DF Individual - Demonstração do Fluxo de Caixa (Método Direto)',
       'DF Consolidado - Demonstração do Fluxo de Caixa (Método Indireto)',
       'DF Individual - Demonstração do Fluxo de Caixa (Método Indireto)',
       'DF Consolidado - Demonstração das Mutações do Patrimônio Líquido',
       'DF Individual - Demonstração das Mutações do Patrimônio Líquido',
       'DF Consolidado - Demonstração de Resultado Abrangente',
       'DF Individual - Demonstração de Resultado Abrangente',
       'DF Consolidado - Demonstração do Resultado',
       'DF Individual - Demonstração do Resultado',
       'DF Consolidado - Demonstração de Valor Adicionado',
       'DF Individual - Demonstração de Valor A

In [15]:
df = cvm.load_metadata()
df

Unnamed: 0,DT_REFER,VERSAO,CD_CVM,CATEG_DOC,ID_DOC,DT_RECEB
0,2010-1...,1,1023,DFP,4822,2011-0...
1,2010-1...,2,1023,DFP,4823,2011-0...
2,2010-1...,3,1023,DFP,6945,2011-0...
3,2010-1...,1,14206,DFP,5436,2011-0...
4,2010-1...,2,14206,DFP,6775,2011-0...
...,...,...,...,...,...,...
14176,2021-0...,1,4693,ITR,110135,2021-1...
14177,2021-0...,1,21091,ITR,103186,2021-0...
14178,2021-0...,1,21091,ITR,106768,2021-0...
14179,2021-0...,1,21091,ITR,109262,2021-1...


In [4]:
lista = [1]
lista.append("")
lista

[1, '']