**Criando conexão no sistema.**

In [None]:
from google.cloud.dataproc_spark_connect import DataprocSparkSession
from google.cloud.dataproc_v1 import Session
from pyspark.sql import functions as F

# This will create a default Spark session
spark = DataprocSparkSession.builder.getOrCreate()


█████████████████████████████████████████████████████████████▎                  







**Lendo tabela da bronze**

In [None]:
df = spark.read.format("bigquery").option("table", "clinic-de.1_bronze.results").load()

df.printSchema()


root
 |-- notes: string (nullable = true)
 |-- method: string (nullable = true)
 |-- test_code: string (nullable = true)
 |-- value: double (nullable = true)
 |-- ref_range: struct (nullable = true)
 |    |-- high: double (nullable = true)
 |    |-- low: double (nullable = true)
 |-- patient_id: string (nullable = true)
 |-- unit: string (nullable = true)
 |-- order_item_id: string (nullable = true)
 |-- validated_by: string (nullable = true)
 |-- flag: string (nullable = true)
 |-- released_at: timestamp (nullable = true)
 |-- order_id: string (nullable = true)
 |-- test_name: string (nullable = true)
 |-- result_id: string (nullable = true)



**Selecionando colunas necessarias do dataframe.**

In [None]:
df_tratado_v1 = (df.select(
    "notes",
    "method",
    "test_code",
    "value",
    "ref_range.*",
    "patient_id",
    "unit",
    "order_item_id",
    "validated_by",
    "flag",
    "released_at",
    "order_id",
    "test_name",
    "result_id",

    )
)

**Verificando se há nulos nas colunas.**

In [None]:
df_tratado_v1.select([F.sum(F.col(c).isNull().cast("integer")).alias(c) for c in df_tratado_v1.columns]).show()

+-----+------+---------+-----+----+---+----------+----+-------------+------------+----+-----------+--------+---------+---------+
|notes|method|test_code|value|high|low|patient_id|unit|order_item_id|validated_by|flag|released_at|order_id|test_name|result_id|
+-----+------+---------+-----+----+---+----------+----+-------------+------------+----+-----------+--------+---------+---------+
| 3072|     0|        0|    0|   0|  0|         0|   0|            0|           0|   0|          0|       0|        0|        0|
+-----+------+---------+-----+----+---+----------+----+-------------+------------+----+-----------+--------+---------+---------+



**Salvando dataframe na silver.**

In [None]:
(df_tratado_v1.write
        .format("bigquery")
        .option("table", "clinic-de.2_silver.results")
        .option("writeMethod", "direct")
        .mode("overwrite")
        .save()
    )

In [None]:
spark.stop()