In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType, DoubleType, TimestampType

spark = SparkSession.builder \
    .appName("DeltaLocal") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars", "/home/ed/jars/delta-core_2.12-2.4.0.jar") \
    .getOrCreate()

print(spark.sparkContext._jsc.sc().listJars())

25/04/21 12:38:01 WARN Utils: Your hostname, edsatc resolves to a loopback address: 127.0.1.1; using 10.0.2.15 instead (on interface enp0s3)
25/04/21 12:38:01 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
25/04/21 12:38:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


Vector(spark://10.0.2.15:44111/jars/delta-core_2.12-2.4.0.jar)


In [None]:

apoliceSchema = StructType([
    StructField("cod_apolice", IntegerType(), False),
    StructField("cod_cliente", IntegerType(), False),
    StructField("data_inicio_vigencia", DateType(), True),
    StructField("data_fim_vigencia", DateType(), True),
    StructField("valor_cobertura", DoubleType(), True),
    StructField("valor_franquia", DoubleType(), True),
    StructField("placa", StringType(), True)
])

df_apolice = spark.read \
    .format("csv") \
    .schema(apoliceSchema) \
    .option("header", "true") \
    .option("encoding", "UTF-8") \
    .load("../data/apolice.csv")

In [3]:
carroSchema = StructType([
    StructField("placa", StringType(), False),
    StructField("modelo", StringType(), True),
    StructField("chassi", StringType(), False),
    StructField("marca", StringType(), True),
    StructField("ano", IntegerType(), True),
    StructField("cor", StringType(), True),
])

df_carro = spark.read \
    .format("csv") \
    .schema(carroSchema) \
    .option("header", "true") \
    .option("encoding", "UTF-8") \
    .load("../data/carro.csv")

In [4]:
clienteSchema = StructType([
    StructField("cod_cliente", IntegerType(), False),
    StructField("nome", StringType(), True),
    StructField("cpf", StringType(), False),
    StructField("sexo", StringType(), True),
    StructField("endereco", StringType(), True),
    StructField("telefone_fixo", StringType(), True),
    StructField("telefone_celular", StringType(), True),
])

df_cliente = spark.read \
    .format("csv") \
    .schema(clienteSchema) \
    .option("header", "true") \
    .option("encoding", "UTF-8") \
    .load("../data/cliente.csv")

In [5]:
sinistroSchema = StructType([
    StructField("cod_sinistro", IntegerType(), False),
    StructField("placa", StringType(), False),
    StructField("data_sinistro", DateType(), True),
    StructField("hora_sinistro", TimestampType(), True),
    StructField("local_sinistro", StringType(), True),
    StructField("condutor", StringType(), True),
])

df_sinistro = spark.read \
    .format("csv") \
    .schema(sinistroSchema) \
    .option("header", "true") \
    .option("encoding", "UTF-8") \
    .load("../data/sinistro.csv")

In [6]:
df_carro.show()

                                                                                

+-------+---------+-----------+----------+----+--------+
|  placa|   modelo|     chassi|     marca| ano|     cor|
+-------+---------+-----------+----------+----+--------+
|ALD3834|     CLIO|34574215969|   RENAULT|2011|  BRANCO|
|CCR8096|    CRETA|88547875547|   HYUNDAI|2020|  BRANCO|
|DLA3438|    PUNTO|98823483434|      FIAT|2013|   PRETO|
|EEE1056|ECO SPORT|56753453455|      FORD|2020|    AZUL|
|FFR1234|    PALIO|32383478747|      FIAT|2009| AMARELO|
|GQY6753|      S10|72004160549|        GM|2015|   PRETO|
|IAC8974|   TIGUAN|77130757746|VOLKSWAGEN|2022|    AZUL|
|JIE0952|   PASSAT|87493270405|VOLKSWAGEN|2016|   CINZA|
|JNU7898|     2020|87628347687|      FORD|2020|   VERDE|
|LVX7086|  SANDERO|00025131958|   RENAULT|1999|VERMELHO|
|LWJ9156|     ONIX|40991078801|        GM|2015|    AZUL|
|MZT1826|      GOL|41150439528|VOLKSWAGEN|1998| AMARELO|
|NAP5760|  COMPASS|40364369549|      JEEP|2017|   PRETO|
|NEM5116|     2008|69469771523|   PEUGEOT|2018|   PRETO|
|NFT2212|     KWID|12344343433|

In [8]:
df_cliente.show()

+-----------+--------------------+-----------+----+--------------------+--------------+----------------+
|cod_cliente|                nome|        cpf|sexo|            endereco| telefone_fixo|telefone_celular|
+-----------+--------------------+-----------+----+--------------------+--------------+----------------+
|          1|MARISA MELO OLIVEIRA|11111111111|   F|RUA JOSÉ WOSCH SO...|(41) 5096-4117|  (41) 5096-4117|
|          2|MURILO CARVALHO C...|22222222222|   M|RUA GEORGE BERNAN...|(21) 3944-5385|            NULL|
|          3|VINICIUS ROCHA RO...|33333333333|   M|                NULL|          NULL|            NULL|
|          4|CAROLINA ROCHA GOMES|44444444444|   F|                NULL|          NULL|            NULL|
|          5| ALINE SANTOS CASTRO|55555555555|   F|RUA ARMANDO PACAG...|(19) 7287-2893|  (19) 7287-2893|
|          6|LEILA CORREIA CAV...|66666666666|   F|RUA FRANCISCO D'A...|          NULL|            NULL|
|          7|SOPHIA CORREIA SA...|77777777777|   F|RUA 

In [7]:
df_sinistro.show()

+------------+-------+-------------+-------------------+--------------+--------+
|cod_sinistro|  placa|data_sinistro|      hora_sinistro|local_sinistro|condutor|
+------------+-------+-------------+-------------------+--------------+--------+
|   202255501|ALD3834|   2022-10-30|2025-04-21 11:00:00|      CRICIUMA|  ARAUJO|
|   202255502|GQY6753|   2022-11-20|2025-04-21 10:34:00|         ICARA| CARDOSO|
|   202255503|IAC8974|   2022-05-24|2025-04-21 22:40:00|       TUBARAO|   CUNHA|
|   202255504|LVX7086|   2023-01-25|2025-04-21 07:37:00|     URUSSANGA|   SILVA|
|   202255505|MZT1826|   2024-03-27|2025-04-21 15:40:00|   SIDEROPOLIS|    LUIZ|
|   202255506|MZT1826|   2023-10-20|2025-04-21 16:00:00| FLORIANOPOLIS|    JOAO|
|   202255507|MZT1826|   2022-05-08|2025-04-21 18:00:00|   NOVA VENEZA|   LUCAS|
|   202255508|IAC8974|   2022-08-08|2025-04-21 21:00:00|      CRICIUMA|   PEDRO|
+------------+-------+-------------+-------------------+--------------+--------+



In [None]:
df_cliente.write.format("parquet").mode("overwrite").save("/tmp/parquet/cliente")
df_cliente.write.format("delta").mode("overwrite").save("/tmp/delta/cliente")
df_carro.write.format("delta").mode("overwrite").save("/tmp/delta/carro")
df_apolice.write.format("delta").mode("overwrite").save("/tmp/delta/apolice")
df_sinistro.write.format("delta").mode("overwrite").save("/tmp/delta/sinistro")

spark.sql("CREATE TABLE delta_cliente USING DELTA LOCATION '/tmp/delta/cliente'")


3.12.3 (main, Feb  4 2025, 14:48:35) [GCC 13.3.0]


In [13]:
resultado = spark.sql("""
    SELECT * FROM cliente WHERE sexo = 'M'
""")
resultado.show()

+-----------+--------------------+-----------+----+--------------------+--------------+----------------+
|cod_cliente|                nome|        cpf|sexo|            endereco| telefone_fixo|telefone_celular|
+-----------+--------------------+-----------+----+--------------------+--------------+----------------+
|          2|MURILO CARVALHO C...|22222222222|   M|RUA GEORGE BERNAN...|(21) 3944-5385|            NULL|
|          3|VINICIUS ROCHA RO...|33333333333|   M|                NULL|          NULL|            NULL|
|          8|  JOÃO CORREIA COSTA|88888888888|   M|   RUA PARAGUAI, 526|(51) 7774-2809|  (51) 7774-2809|
|          9|   RAFAEL DIAS SOUZA|99999999999|   M|RUA MARIA MARCHI ...|(16) 7459-3139|  (16) 7459-3139|
+-----------+--------------------+-----------+----+--------------------+--------------+----------------+



In [14]:
resultado_join = spark.sql("""
    SELECT c.nome, a.placa, a.valor_cobertura
    FROM cliente c
    JOIN apolice a ON c.cod_cliente = a.cod_cliente
""")
resultado_join.show()

+--------------------+-------+---------------+
|                nome|  placa|valor_cobertura|
+--------------------+-------+---------------+
|MARISA MELO OLIVEIRA|MZT1826|        2565.25|
|MARISA MELO OLIVEIRA|NAP5760|        9425.25|
|MARISA MELO OLIVEIRA|CCR8096|       19970.84|
|MURILO CARVALHO C...|NFT2212|       19509.51|
|MURILO CARVALHO C...|ALD3834|        7865.55|
|VINICIUS ROCHA RO...|JIE0952|       19456.46|
|VINICIUS ROCHA RO...|LVX7086|         4615.6|
|VINICIUS ROCHA RO...|LWJ9156|       19130.12|
|VINICIUS ROCHA RO...|EEE1056|       15760.31|
|CAROLINA ROCHA GOMES|GQY6753|       15040.52|
|CAROLINA ROCHA GOMES|DLA3438|       16261.87|
|LEILA CORREIA CAV...|FFR1234|       17561.01|
|SOPHIA CORREIA SA...|NEM5116|        16081.9|
|SOPHIA CORREIA SA...|IAC8974|       12595.89|
|SOPHIA CORREIA SA...|IAC8974|        6815.28|
|SOPHIA CORREIA SA...|IAC8974|         2737.3|
+--------------------+-------+---------------+

