
# I - Configuração do ambiente


## 1.1 - Montagem do drive

In [3]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


## 1.2 - Escolha do ambiente e carregamento
São disponibilizadas duas opções de execução (dependendo da credencial utilizada)
 - Leitura: disponível para analises e usuários que não possuem permissão de escrita;
 - Escrita: disponível para atualização de bases (testes de desenvolvimento ou ambiente de produção).

In [5]:
import os
if os.path.isfile('/content/monitor-rosa-leitura.json'):
    datalake_mode = 'leitura'
    %env SERVICE_ACCOUNT_USER=acesso-leitura@monitor-rosa.iam.gserviceaccount.com
    %env SERVICE_ACCOUNT_JSON=/content/monitor-rosa-leitura.json
elif os.path.isfile('/content/monitor-rosa-escrita.json'):
    datalake_mode = 'escrita'
    %env SERVICE_ACCOUNT_USER=acesso-escrita@monitor-rosa.iam.gserviceaccount.com
    %env SERVICE_ACCOUNT_JSON=/content/monitor-rosa-escrita.json
else:
    assert(os.path.isdir('/content/drive/Shareddrives/monitor-rosa-gold') == True)
    datalake_mode = 'shared_drive'
    %env SERVICE_ACCOUNT_USER=''
    %env SERVICE_ACCOUNT_JSON=''
datalake_mode

env: SERVICE_ACCOUNT_USER=''
env: SERVICE_ACCOUNT_JSON=''


'shared_drive'

In [6]:
!rm -r sus-kpis-analysis
!git clone https://github.com/heber-augusto/sus-kpis-analysis.git

rm: cannot remove 'sus-kpis-analysis': No such file or directory
Cloning into 'sus-kpis-analysis'...
remote: Enumerating objects: 1768, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 1768 (delta 16), reused 12 (delta 12), pack-reused 1740 (from 3)[K
Receiving objects: 100% (1768/1768), 4.78 MiB | 21.58 MiB/s, done.
Resolving deltas: 100% (816/816), done.


## 1.3 - Instalação de libs Python, inicialização de variáveis de ambiente e configuração/instalação do Spark

In [7]:
!pip install -r /content/sus-kpis-analysis/sia/etls/requirements.txt

%env PYTHONHASHSEED=1234
%env JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
%env SPARK_HOME=/content/spark-3.4.4-bin-hadoop3
%env SPARK_VERSION=3.4.4

!source /content/sus-kpis-analysis/sia/etls/bin/setup_spark_env.sh '/content/'

Collecting findspark (from -r /content/sus-kpis-analysis/sia/etls/requirements.txt (line 1))
  Downloading findspark-2.0.1-py2.py3-none-any.whl.metadata (352 bytes)
Collecting imagehash (from -r /content/sus-kpis-analysis/sia/etls/requirements.txt (line 3))
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting delta-spark==2.4.0 (from -r /content/sus-kpis-analysis/sia/etls/requirements.txt (line 4))
  Downloading delta_spark-2.4.0-py3-none-any.whl.metadata (1.9 kB)
Collecting pyspark (from -r /content/sus-kpis-analysis/sia/etls/requirements.txt (line 2))
  Downloading pyspark-3.4.4.tar.gz (311.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.4/311.4 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting PyWavelets (from imagehash->-r /content/sus-kpis-analysis/sia/etls/requirements.txt (line 3))
  Downloading pywavelets-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x8

In [8]:
if datalake_mode != 'shared_drive':
    %env XDG_CONFIG_HOME=/content/datalake
    !source /content/sus-kpis-analysis/sia/etls/bin/install-google-drive-ocamlfuse.sh
    !source /content/sus-kpis-analysis/sia/etls/bin/mount_google_drive_v2.sh '/content/datalake' $SERVICE_ACCOUNT_USER '0ABIY-a4qrdY9Uk9PVA' 'monitor-rosa-bronze' $SERVICE_ACCOUNT_JSON '/content'
    !source /content/sus-kpis-analysis/sia/etls/bin/mount_google_drive_v2.sh '/content/datalake' $SERVICE_ACCOUNT_USER '0ALl0owLNr53oUk9PVA' 'monitor-rosa-silver' $SERVICE_ACCOUNT_JSON '/content'
    !source /content/sus-kpis-analysis/sia/etls/bin/mount_google_drive_v2.sh '/content/datalake' $SERVICE_ACCOUNT_USER '0AMHp9pBeLvZiUk9PVA' 'monitor-rosa-gold' $SERVICE_ACCOUNT_JSON '/content'

## 1.4 - Inicializa variáveis de acesso ao delta lake criado no drive




> O caminho do warehouse pode ser alterado em caso de testes de escritas locais.


In [9]:
import os

lake_prefix = "temp-output"

if datalake_mode in ('leitura','shared_drive',):
    warehouse_dir = f"/content/datalake/{lake_prefix}/"

if datalake_mode == 'escrita':
    warehouse_dir = f"/content/datalake/"

spark_path = os.getenv('SPARK_HOME')
spark_path

'/content/spark-3.4.4-bin-hadoop3'

## 1.5 - Inclusão da pasta do repositório no python path

Procedimento permite que funções e classes presentes no repositório sejam utilizadas

In [10]:
import sys
sys.path.append('/content/sus-kpis-analysis')
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/usr/local/lib/python3.10/dist-packages/setuptools/_vendor',
 '/root/.ipython',
 '/content/sus-kpis-analysis']

## 1.6 - Importação de funções utilizadas pelo código

In [11]:
from sia.etls.lib.catalog_loader import DeltaLakeDatabaseFsCreator, load_entire_catalog_fs_v2
from sia.etls.lib.table_utilities import vacuum_tables_from_database, table_exists
from sia.etls.lib.fs_spark_session import create_fs_spark_session
from sia.etls.lib.bronze_files_utilities import get_pending_files_from_bronze
from sia.etls.lib.delta_table_creators import ParquetToDelta

## 1.7 - Cria Sessão Spark conectada ao Delta Lake presente no Drive

In [12]:
spark = create_fs_spark_session(
    warehouse_dir=warehouse_dir,
    spark_path=spark_path
)

## 1.8 - Refresh do catálogo para utilizar consultas

In [13]:
zone_names = ['monitor-rosa-bronze','monitor-rosa-silver','monitor-rosa-gold']

if datalake_mode in ('leitura', 'escrita'):

    zone_paths = [f'/content/datalake/{zone_name}/databases' for zone_name in zone_names]
else:
    zone_paths = [f'/content/drive/Shareddrives/{zone_name}/databases' for zone_name in zone_names]



# Carrega catalogo de banco de dados, na zona bronze
database_filter = None #['cnes_bronze.db',]

table_filter = None #['sia_bronze.ar','sia_bronze.aq', 'ibge_silver.cadastro_municipios', 'ibge_silver.demografia_municipios' ]

for databases_path in zone_paths:
    load_entire_catalog_fs_v2(
        spark_session = spark,
        databases_path = databases_path,
        use_db_folder_path=(datalake_mode == 'escrita'),
        database_filter=database_filter,
        table_filter=table_filter
    )

['sia_bronze.db', 'cnes_bronze.db', 'sih_bronze.db', 'sim_bronze.db']
Banco de dados sia_bronze criado.
listando conteúdos do caminho /content/drive/Shareddrives/monitor-rosa-bronze/databases e database sia_bronze
prefix: /content/drive/Shareddrives/monitor-rosa-bronze/databases/sia_bronze.db/
table_list: ['ar', 'aq', 'pa', 'bi', 'am']
Tabela ar criada
Tabela ar criada com comando CREATE TABLE IF NOT EXISTS sia_bronze.ar USING delta LOCATION '/content/drive/Shareddrives/monitor-rosa-bronze/databases/sia_bronze.db/ar'
Tabela aq criada
Tabela aq criada com comando CREATE TABLE IF NOT EXISTS sia_bronze.aq USING delta LOCATION '/content/drive/Shareddrives/monitor-rosa-bronze/databases/sia_bronze.db/aq'
Tabela pa criada
Tabela pa criada com comando CREATE TABLE IF NOT EXISTS sia_bronze.pa USING delta LOCATION '/content/drive/Shareddrives/monitor-rosa-bronze/databases/sia_bronze.db/pa'
Tabela bi criada
Tabela bi criada com comando CREATE TABLE IF NOT EXISTS sia_bronze.bi USING delta LOCATION

## 1.9 - Cria banco de dados gold


In [22]:
destination_database_name = 'cancer_mama_1'

warehouse_dir_g =  f"/content/datalake/{lake_prefix}/"


db_creator = DeltaLakeDatabaseFsCreator(
    spark_session= spark,
    database_location=warehouse_dir_g,
    database_name=destination_database_name
)
db_creator.create_database(
     use_db_folder_path=(datalake_mode == 'escrita')
)

Banco de dados cancer_mama_1 criado.


# II - Exemplo de como listar bancos e tabelas

In [15]:
databases = spark.sql(f"SHOW DATABASES;")
databases.show()

+-----------+
|  namespace|
+-----------+
|cancer_data|
|cancer_mama|
|cnes_bronze|
|    default|
|ibge_silver|
| sia_bronze|
| sih_bronze|
| sim_bronze|
+-----------+



In [15]:
for row in databases.collect():
    spark.sql(f"SHOW TABLES FROM {row['namespace']};").show(truncate=False)

+-----------+-------------------------+-----------+
|namespace  |tableName                |isTemporary|
+-----------+-------------------------+-----------+
|cancer_data|aq_filtered              |false      |
|cancer_data|ar_filtered              |false      |
|cancer_data|cadastro_municipios      |false      |
|cancer_data|dados_estados_mensal     |false      |
|cancer_data|dados_municipios_mensal  |false      |
|cancer_data|demografia_municipios    |false      |
|cancer_data|pacientes                |false      |
|cancer_data|procedimentos            |false      |
|cancer_data|procedimentos_e_pacientes|false      |
+-----------+-------------------------+-----------+

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
+---------+---------+-----------+

+-----------+---------+-----------+
|namespace  |tableName|isTemporary|
+-----------+---------+-----------+
|cnes_bronze|dc       |false      |
|cnes_bronze|ep       |false      |
|cnes_

# III - Extração e Filtragem:



## 3.1 - Definindo utils e filtros

In [16]:
# Função para gerar consultas SQL
def get_select_all_query(table_name, where_clause=''):
    return f"""
    SELECT
        *
    FROM {table_name}
    {where_clause}
    """

# Função para executar consultas SQL no Spark
def run_sql_query(sql_query):
    return spark.sql(sql_query)

In [17]:
# Definindo variavel destination_database_name

destination_database_name = 'cancer_mama'

# Filtro pelo CID

cid_filter = ['C500', 'C501', 'C502', 'C503', 'C504', 'C505', 'C506', 'C508', 'C509']

cid_filter = f"""({','.join([f"'{cid_id}'" for cid_id in cid_filter])})"""

# Dicionário e filtro de procedimentos
proc_id_dict = {
    '0201010569': 'BIOPSIA/EXERESE DE NÓDULO DE MAMA',
    '0201010585': 'PUNÇÃO ASPIRATIVA DE MAMA POR AGULHA FINA',
    '0201010607': 'PUNÇÃO DE MAMA POR AGULHA GROSSA',
    '0203010035': 'EXAME DE CITOLOGIA (EXCETO CERVICO-VAGINAL E DE MAMA)',
    '0203010043': 'EXAME CITOPATOLOGICO DE MAMA',
    '0203020065': 'EXAME ANATOMOPATOLOGICO DE MAMA - BIOPSIA',
    '0203020073': 'EXAME ANATOMOPATOLOGICO DE MAMA - PECA CIRURGICA',
    '0205020097': 'ULTRASSONOGRAFIA MAMARIA BILATERAL',
    '0208090037': 'CINTILOGRAFIA DE MAMA (BILATERAL)',
    '0204030030': 'MAMOGRAFIA',
    '0204030188': 'MAMOGRAFIA BILATERAL PARA RASTREAMENTO'
}

proc_id_filter = f"""({','.join([f"'{proc_id}'" for proc_id in proc_id_dict.keys()])})"""

# Mostrar os filtros
print(proc_id_filter)
print(cid_filter)

('0201010569','0201010585','0201010607','0203010035','0203010043','0203020065','0203020073','0205020097','0208090037','0204030030','0204030188')
('C500','C501','C502','C503','C504','C505','C506','C508','C509')


## 3.2 - Carrega Tabela de Cadastro de Municípios

In [18]:
query_cadastro_municipios = spark.sql("""

SELECT *
FROM
ibge_silver.cadastro_municipios

""")

## 3.2 - Carrega tabela SIA.AR filtrando dados de câncer de mama e procedimentos de interesse

In [19]:
sql_query_ar = get_select_all_query(
    table_name='sia_bronze.ar',
    where_clause=f"""

        WHERE AP_CIDPRI IN {cid_filter}
        AND AP_MVM = '202405'
        """
)

cancer_ar_filtered = run_sql_query(sql_query_ar)


cancer_ar_filtered.show(3)

+------+---------+---------+---------+-------------+------+----------+--------+--------+--------+---------+---------+--------------+-------------+---------------+----------+----------+-------+----------+---------+----------+---------+--------+--------+---------+--------+---------+---------+---------+--------+---------+---------+-------+---------+---------+----------+---------+-------------+---------+----------+--------+---------+---------+---------+--------+-------+--------+---------+---------+---------+---------+---------+----------+---------+----------+---------+----------+---------+---------+---------+---------+---------+---------+---------+--------+---------+---------+---------+---------+---------+---------+--------+--------+--------------------+----------------------+--------------------+-------------------+---------+
|AP_MVM|AP_CONDIC|AP_GESTAO|AP_CODUNI|   AP_AUTORIZ|AP_CMP| AP_PRIPAL|AP_VL_AP|AP_UFMUN|AP_TPUPS|AP_TIPPRE|AP_MN_IND|    AP_CNPJCPF|   AP_CNPJMNT|      AP_CNSPCN|AP_

In [23]:
cancer_ar_filtered\
      .repartition(1)\
      .write\
      .format("delta")\
      .mode("overwrite")\
      .saveAsTable(f"{destination_database_name}.ar_filtered")

## 3.3 - Carrega tabela SIA.AQ filtrando dados de câncer de mama e procedimentos de interesse

In [None]:
sql_query_aq = get_select_all_query(
    table_name='sia_bronze.aq',
    where_clause=f"""
        WHERE AP_CIDPRI IN {cid_filter}
        AND AP_MVM = '202405'
    """
)

cancer_aq_filtered = run_sql_query(sql_query_aq)

In [None]:
cancer_aq_filtered\
      .repartition(1)\
      .write\
      .format("delta")\
      .mode("overwrite")\
      .saveAsTable(f"{destination_database_name}.aq_filtered")

# IV - Processamento dos Dados dos Pacientes e Procedimentos

## 4.1 - Cria dados consolidados de pacientes e procedimentos (quimio e radioterapia)

Radioterapia

In [None]:
cancer_ar_res = spark.sql(f"""
SELECT
    AP_CMP as data,
    AP_CNSPCN as paciente,
    AR_ESTADI as estadiamento,
    DOUBLE(AP_VL_AP)  as custo,
    INT(AP_OBITO) as obito,
    AP_MUNPCN as municipio
FROM cancer_data.ar_filtered
""")
cancer_ar_res.show(3)

+------+---------------+------------+------+-----+---------+
|  data|       paciente|estadiamento| custo|obito|municipio|
+------+---------------+------------+------+-----+---------+
|202405|{{|}{}}}|           1|5904.0|    0|   355030|
|202405|{{}}}}~~~|           3|5904.0|    0|   352310|
|202404|{|{}||           2|   0.0|    0|   355030|
+------+---------------+------------+------+-----+---------+
only showing top 3 rows



Quimio

In [None]:
cancer_aq_res = spark.sql(f"""
SELECT
    AP_CMP as data,
    AP_CNSPCN as paciente,
    AQ_ESTADI as estadiamento,
    DOUBLE(AP_VL_AP)  as custo,
    INT(AP_OBITO) as obito,
    AP_MUNPCN as municipio
FROM cancer_data.aq_filtered
""")
cancer_aq_res.show(3)

+------+---------------+------------+------+-----+---------+
|  data|       paciente|estadiamento| custo|obito|municipio|
+------+---------------+------------+------+-----+---------+
|202404|{{}{|{|           3|1400.0|    0|   351640|
|202404|{{||~}||           3|1400.0|    0|   355030|
|202405|{}~{|{||||           4|1400.0|    0|   355220|
+------+---------------+------------+------+-----+---------+
only showing top 3 rows



## 4.2 - Unifica os dados de radio e quimio consolidados

In [None]:
df_union = cancer_aq_res.union(cancer_ar_res)

df_union.createOrReplaceTempView("cancer_ordered")

In [None]:
df_union\
  .repartition(1)\
  .write\
  .format("delta")\
  .mode("overwrite")\
  .saveAsTable(f"{destination_database_name}.procedimentos")

## 4.3 - Consolidando os dados por paciente

In [None]:
res_consolidado = spark.sql("""
SELECT
    paciente,
    FIRST(data) as data_primeiro_estadiamento,
    LAST(data) as data_ultimo_estadiamento,
    COUNT(1) as numero_procedimentos,
    FIRST(estadiamento) as primeiro_estadiamento,
    LAST(estadiamento) as ultimo_estadiamento,
    MAX (estadiamento) as maior_estadiamento,
    MIN (estadiamento) as menor_estadiamento,
    SUM(custo) as custo_total,
    MAX(obito) as indicacao_obito,
    FIRST(municipio) as primeiro_municipio,
    LAST(municipio) as ultimo_municipio
FROM (SELECT * FROM cancer_ordered ORDER BY paciente, data)
GROUP BY paciente
""")

res_consolidado\
  .repartition(1)\
  .write\
  .format("delta")\
  .mode("overwrite")\
  .saveAsTable(f"{destination_database_name}.pacientes")

## 4.4 - Procedimentos e Pacientes

In [None]:
procedimentos_e_pacientes = spark.sql(f"""
  SELECT
      c.*,
      p.data_primeiro_estadiamento,
      p.data_ultimo_estadiamento,
      p.primeiro_estadiamento,
      p.maior_estadiamento,
      p.ultimo_estadiamento,
      p.custo_total,
      p.primeiro_municipio,
      p.ultimo_municipio,
      p.indicacao_obito
  FROM cancer_data.procedimentos AS c
  FULL OUTER JOIN cancer_data.pacientes AS p
  ON c.paciente = p.paciente
""")

procedimentos_e_pacientes\
  .repartition(1)\
  .write\
  .format("delta")\
  .mode("overwrite")\
  .saveAsTable(f"{destination_database_name}.procedimentos_e_pacientes")

# V - Agregação por Município e Estado

## 5.1 - Consolida dados por municipio


In [None]:
database_name = "cancer_data"

diagnosticos_por_estadiamento_municipio_df = spark.sql(f"""
    SELECT
        primeiro_estadiamento,
        data_primeiro_estadiamento AS data,
        primeiro_municipio AS municipio,
        COUNT(DISTINCT(paciente)) AS numero_diagnosticos
    FROM {destination_database_name}.pacientes
    WHERE primeiro_estadiamento != ''
    GROUP BY primeiro_estadiamento, data_primeiro_estadiamento, primeiro_municipio
""")



diagnosticos_por_estadiamento_municipio_df.createOrReplaceTempView("diagnosticos_por_estadiamento_municipio")

## 5.2 - Consolida dados mensais por municipio

In [None]:
dados_estad_municipio_mensal_df = spark.sql(f"""
    SELECT
        data,
        municipio,
        primeiro_estadiamento,
        SUM(custo) AS custo_estadiamento,
        COUNT(DISTINCT(paciente)) AS numero_pacientes,
        SUM(DISTINCT(obito)) AS obitos,
        SUM(DISTINCT(indicacao_obito)) AS obito_futuro,
        COUNT(1) AS numero_procedimentos
    FROM
        (SELECT * FROM {destination_database_name}.procedimentos_e_pacientes ORDER BY data)
    GROUP BY data, municipio, primeiro_estadiamento
""")



dados_estad_municipio_mensal_df.createOrReplaceTempView("dados_municipios_mensal")

## 5.3 - Consolida dados por municipio mensal

In [None]:
dados_estad_municipio_mensal = spark.sql("""
    SELECT
        mm.*,
        COALESCE(em.numero_diagnosticos, 0) AS numero_diagnosticos
    FROM dados_municipios_mensal mm
    FULL OUTER JOIN diagnosticos_por_estadiamento_municipio em
    ON mm.data = em.data
    AND mm.municipio = em.municipio
    AND mm.primeiro_estadiamento = em.primeiro_estadiamento
""")



dados_estad_municipio_mensal\
    .repartition(1)\
    .write\
    .format("delta")\
    .mode("overwrite")\
    .saveAsTable(f"{destination_database_name}.dados_estad_municipio_mensal")

## 5.4 - Agregação por estado

In [None]:
dados_estad_mensal = spark.sql(f"""
    SELECT
        estado,
        data,
        primeiro_estadiamento,
        SUM(custo_estadiamento) AS custo_estadiamento,
        SUM(numero_pacientes) AS numero_pacientes,
        COUNT(DISTINCT(municipio)) AS numero_municipios,
        SUM(obitos) AS obitos,
        SUM(obito_futuro) AS obitos_futuros,
        SUM(numero_procedimentos) AS numero_procedimentos,
        SUM(numero_diagnosticos) AS numero_diagnosticos
    FROM (
        SELECT
            cadastro_cidades.nome_uf AS estado,
            mm.*
        FROM cancer_data.dados_municipios_mensal mm
        LEFT JOIN cancer_data.cadastro_municipios AS cadastro_cidades
        ON int(mm.municipio) = int(cadastro_cidades.id / 10)
        ORDER BY data
    ) AS dados_estado
    GROUP BY estado, data, primeiro_estadiamento
""")

# Salvando a tabela agregada por estado
dados_estad_mensal\
    .repartition(1)\
    .write\
    .format("delta")\
    .mode("overwrite")\
    .saveAsTable(f"{destination_database_name}.dados_estados_mensal")

AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `cancer_data`.`dados_municipios_mensal` cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS.; line 17 pos 13;
'Aggregate ['estado, 'data, 'primeiro_estadiamento], ['estado, 'data, 'primeiro_estadiamento, 'SUM('custo_estadiamento) AS custo_estadiamento#9639, 'SUM('numero_pacientes) AS numero_pacientes#9640, 'COUNT(distinct 'municipio) AS numero_municipios#9641, 'SUM('obitos) AS obitos#9642, 'SUM('obito_futuro) AS obitos_futuros#9643, 'SUM('numero_procedimentos) AS numero_procedimentos#9644, 'SUM('numero_diagnosticos) AS numero_diagnosticos#9645]
+- 'SubqueryAlias dados_estado
   +- 'Sort ['data ASC NULLS FIRST], true
      +- 'Project ['cadastro_cidades.nome_uf AS estado#9638, mm.*]
         +- 'Join LeftOuter, ('int('mm.municipio) = 'int(('cadastro_cidades.id / 10)))
            :- 'SubqueryAlias mm
            :  +- 'UnresolvedRelation [cancer_data, dados_municipios_mensal], [], false
            +- SubqueryAlias cadastro_cidades
               +- SubqueryAlias spark_catalog.cancer_data.cadastro_municipios
                  +- Relation spark_catalog.cancer_data.cadastro_municipios[id#9646,nome#9647,id_uf#9648,nome_uf#9649] parquet


# VI - Limpeza das tabelas delta, considerando 24 horas de retenção


In [None]:
vacuum_tables_from_database(
        spark_session = spark,
        database_name = database_name,
        retention_hours = 24
    )