In [1]:
import duckdb
import pandas as pd
import os
from datetime import datetime

In [2]:
# Criando (ou abrindo) e conectando o Banco de Dados

con = duckdb.connect(database='dados_duckdb.db', read_only=False)

In [3]:
# Abrindo o arquivo z0019_1.csv e acrescentando duas colunas (nome_arquivo e data_ingestao)

arquivo = 'z0019_1.csv'
data_ingestao = datetime.now()

df = pd.read_csv(f'../landing/{arquivo}', sep=';')
df['nome_arquivo'] = arquivo
df['data_ingestao'] = data_ingestao
df.head()

Unnamed: 0,NATBR,MAKTX,WERKS,MAINS,LABST,nome_arquivo,data_ingestao
0,10001,PARAFUSO,BT10,100,100,z0019_1.csv,2025-11-19 22:14:28.898572
1,10002,MARTELO,BT50,100,1500,z0019_1.csv,2025-11-19 22:14:28.898572
2,10003,PREGO,BT10,100,50,z0019_1.csv,2025-11-19 22:14:28.898572


In [4]:
# Criando a tabela bronze_z0019

con.execute("""
    CREATE TABLE IF NOT EXISTS bronze_z0019 (
        NATBR VARCHAR,
        MAKTX VARCHAR,
        WERKS VARCHAR,
        MAINS VARCHAR,
        LABST VARCHAR,
        nome_arquivo VARCHAR,
        data_ingestao TIMESTAMP
    )
""")

<_duckdb.DuckDBPyConnection at 0x1b557abbb30>

In [5]:
# Verificando a tabela criada (sem dados ainda)

resultado = con.execute("SELECT * FROM bronze_z0019").fetchdf()
resultado.head()

Unnamed: 0,NATBR,MAKTX,WERKS,MAINS,LABST,nome_arquivo,data_ingestao


In [6]:
# Ingestão de dados - Selecionando tudo que está no df e fazendo insert na tabela bronze_z0019

con.execute("INSERT INTO bronze_z0019 SELECT * FROM df")

<_duckdb.DuckDBPyConnection at 0x1b557abbb30>

In [7]:
# Conferindo a tabela

resultado2 = con.execute("SELECT * FROM bronze_z0019").fetchdf()
resultado2.head()

Unnamed: 0,NATBR,MAKTX,WERKS,MAINS,LABST,nome_arquivo,data_ingestao
0,10001,PARAFUSO,BT10,100,100,z0019_1.csv,2025-11-19 22:14:28.898572
1,10002,MARTELO,BT50,100,1500,z0019_1.csv,2025-11-19 22:14:28.898572
2,10003,PREGO,BT10,100,50,z0019_1.csv,2025-11-19 22:14:28.898572


✅ Inserindo os dados do arquivo z0019_2.csv na tabela bronze_z0019

In [8]:
# Abrindo o arquivo z0019_2.csv e acrescentando duas colunas (nome_arquivo e data_ingestao)

arquivo2 = 'z0019_2.csv'
data_ingestao2 = datetime.now()

df2 = pd.read_csv(f'../landing/{arquivo2}', sep=';')
df2['nome_arquivo'] = arquivo2
df2['data_ingestao'] = data_ingestao2
df2.head()

Unnamed: 0,NATBR,MAKTX,WERKS,MAINS,LABST,nome_arquivo,data_ingestao
0,10004,SERRA,BT50,100,200,z0019_2.csv,2025-11-19 22:15:03.155236
1,10005,MACHADO,BT50,100,100,z0019_2.csv,2025-11-19 22:15:03.155236
2,10003,PREGO,BT10,100,60,z0019_2.csv,2025-11-19 22:15:03.155236


In [9]:
# Ingestão de dados - Selecionando tudo que está no df2 e fazendo insert na bronze_z0019

con.execute("INSERT INTO bronze_z0019 SELECT * FROM df2")

<_duckdb.DuckDBPyConnection at 0x1b557abbb30>

In [10]:
resultado3 = con.execute("SELECT * FROM bronze_z0019").fetchdf()
resultado3.head(10)

Unnamed: 0,NATBR,MAKTX,WERKS,MAINS,LABST,nome_arquivo,data_ingestao
0,10001,PARAFUSO,BT10,100,100,z0019_1.csv,2025-11-19 22:14:28.898572
1,10002,MARTELO,BT50,100,1500,z0019_1.csv,2025-11-19 22:14:28.898572
2,10003,PREGO,BT10,100,50,z0019_1.csv,2025-11-19 22:14:28.898572
3,10004,SERRA,BT50,100,200,z0019_2.csv,2025-11-19 22:15:03.155236
4,10005,MACHADO,BT50,100,100,z0019_2.csv,2025-11-19 22:15:03.155236
5,10003,PREGO,BT10,100,60,z0019_2.csv,2025-11-19 22:15:03.155236


In [11]:
# Desconectando

con.close()