# TABELAS BRONZE

VAMOS LER OS ARQUIVOS CSV ORIGINAIS DO GELATO E TRANSFORMÁ-LOS EM DADOS ANALÍTICOS

O arquivo Visitas1.csv foi lida em python por algumas dificuldades no SQL

In [0]:
# 1️⃣ Ler CSV com parsing correto (multilinha, aspas, escape)
df = (
    spark.read.format("csv")
        .option("header", "true")
        .option("inferSchema", "false")
        .option("multiLine", "true")
        .option("quote", '"')
        .option("escape", '"')
        .load("/Workspace/Users/alabrao@hotmail.com/dados_originais/visitas1.csv")
)

# 2️⃣ Renomear todas as colunas com espaços, acentos ou caracteres inválidos
df = (
    df
    .withColumnRenamed("ambiente/servicoTag", "ambiente_servico_tag")
    .withColumnRenamed("dataVisita", "data_visita")
    .withColumnRenamed("escritorio", "escritorio")
    .withColumnRenamed("fotos", "fotos")
    .withColumnRenamed("horaVisita", "hora_visita")
    .withColumnRenamed("Nome", "nome")
    .withColumnRenamed("ObservaÃ§Ã£o", "observacao")
    .withColumnRenamed("projetoVisita", "projeto_visita")
    .withColumnRenamed("statusVisita", "status_visita")
    .withColumnRenamed("TagAmbiente", "tag_ambiente")
    .withColumnRenamed("Tags", "tags")
    .withColumnRenamed("TagServiÃ§o", "tag_servico")
    .withColumnRenamed("userEquipe", "user_equipe")
    .withColumnRenamed("Creation Date", "creation_date")
    .withColumnRenamed("Modified Date", "modified_date")
    .withColumnRenamed("Slug", "slug")
    .withColumnRenamed("Creator", "creator")
    .withColumnRenamed("unique id", "unique_id")
)

# 3️⃣ Apagar a tabela bronze antiga
spark.sql("DROP TABLE IF EXISTS bronze_visitas")

# 4️⃣ Criar a bronze corrigida no Delta Lake
df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("bronze_visitas")


In [0]:
%sql
SHOW TABLES

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_projetos AS
SELECT
  `Creation Date` AS creation_date,
  `Modified Date` AS modified_date,
  `unique id` AS unique_id,
  *
EXCEPT(`Creation Date`, `Modified Date`, `unique id`	)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/dados_originais/projetos.csv",
  format => "csv",
  header => true,
  inferSchema => true
);


In [0]:
%sql SHOW TABLES

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_users AS
SELECT
  `Creation Date` AS creation_date,
  `Modified Date` AS modified_date,
  `unique id` AS unique_id,
  *
EXCEPT(`Creation Date`, `Modified Date`, `unique id`	)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/users.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_fotos AS
SELECT
  `Creation Date` AS creation_date,
   *
EXCEPT(`Creation Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/fotos.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_pendencias AS
SELECT
    `Envolver cliente` AS envolver_cliente,
    projetoPendencias AS projeto_pendencias,
    Resolvida AS resolvida,
    Titulo AS titulo,
    Urgente AS urgente,
    visitaPendencias AS visita_pendencias,
    `Creation Date` AS creation_date,
    `Modified Date` AS modified_date,
    Slug AS slug,
    Creator AS creator,
    `unique id` AS unique_id
FROM read_files(
  "dbfs:/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/pendencias1.csv",
  format => "csv",
  header => true,
  inferSchema => true
)
WHERE _rescued_data IS NULL;


o arquivo teve que ser lido em python por dificuldades no SQL com as colunas 

In [0]:
# 1. Ler o CSV com parser robusto
df = (
    spark.read.format("csv")
        .option("header", "true")
        .option("inferSchema", "false")
        .option("multiLine", "true")
        .option("quote", '"')
        .option("escape", '"')
        .load("dbfs:/Workspace/Users/alabrao@hotmail.com/dados_originais/pendencias1.csv")
)

# 2. Renomear colunas inválidas
df = (
    df
    .withColumnRenamed("Envolver cliente", "envolver_cliente")
    .withColumnRenamed("projetoPendencias", "projeto_pendencias")
    .withColumnRenamed("Resolvida", "resolvida")
    .withColumnRenamed("Titulo", "titulo")
    .withColumnRenamed("Urgente", "urgente")
    .withColumnRenamed("visitaPendencias", "visita_pendencias")
    .withColumnRenamed("Creation Date", "creation_date")
    .withColumnRenamed("Modified Date", "modified_date")
    .withColumnRenamed("Slug", "slug")
    .withColumnRenamed("Creator", "creator")
    .withColumnRenamed("unique id", "unique_id")
)

# 3. Remover colunas lixo como _rescued_data (se existir)
if "_rescued_data" in df.columns:
    df = df.drop("_rescued_data")

# 4. Criar bronze limpa
df.write.format("delta").mode("overwrite").saveAsTable("bronze_pendencias")


In [0]:
%sql
CREATE OR REPLACE TABLE bronze_escritorios AS
SELECT
  `Creation Date` AS creation_date,
    *
EXCEPT(`Creation Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/escritorios.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_ambientes AS
SELECT
  `Creation Date` AS creation_date,
   *
EXCEPT(`Creation Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/ambientes.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_ambiente_servicos AS
SELECT
  `Creation Date` AS creation_date,  
  `Modified Date` AS modified_date,
   *
EXCEPT(`Creation Date`, `Modified Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/ambiente-servicos.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_equipes AS
SELECT
  `Creation Date` AS creation_date,  
     *
EXCEPT(`Creation Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/equipes.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
CREATE OR REPLACE TABLE bronze_assinaturas AS
SELECT
  `Creation Date` AS creation_date,  
  
   *
EXCEPT(`Creation Date`)
FROM read_files(
  "/Workspace/Users/alabrao@hotmail.com/MVP-engenharia_de_dados/dados_originais/assinaturas.csv",
  format => "csv",
  header => true,
  inferSchema => true
);

In [0]:
%sql
SHOW TABLES

In [0]:
%sql
DESCRIBE TABLE bronze_users;

In [0]:
%sql
DESCRIBE TABLE bronze_projetos;

In [0]:
%sql
DESCRIBE TABLE bronze_visitas;

**Na leitura dos arquivos CSV o inferschema não foi de grande valia, por a maioria dos campos acabou sendo convertido como string.**

In [0]:
%sql
SHOW TABLES IN workspace.default;

In [0]:
%sql
select * from bronze_visitas limit 20;

In [0]:
%sql
describe table bronze_visitas;

In [0]:
%sql
SELECT nome, `Observação` FROM bronze_visitas LIMIT 20;

In [0]:
%sql
select * from bronze_visitas limit 10;