# Configurações Iniciais

In [1]:
from pyspark.sql.types import *
import pyspark.sql.functions as fn
from pyspark.sql import SparkSession
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

spark = (SparkSession.builder
         .config("spark.jars","""/home/jovyan/jars/aws-java-sdk-core-1.11.534.jar,
                                 /home/jovyan/jars/aws-java-sdk-dynamodb-1.11.534.jar,
                                 /home/jovyan/jars/aws-java-sdk-s3-1.11.534.jar,
                                 /home/jovyan/jars/hadoop-aws-3.2.2.jar,
                                 /home/jovyan/jars/postgresql-42.3.3.jar""")
         .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
         .config("spark.hadoop.fs.s3a.access.key", "aulafia")
         .config("spark.hadoop.fs.s3a.secret.key", "aulafia@123")
         .config("spark.hadoop.fs.s3a.path.style.access", True)
         .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
         .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
         .getOrCreate()
        )

# Leitura Database

In [2]:
schema = ArrayType(
                   StructType([
                               StructField('name', StringType(), True),
                               StructField('url', StringType(), True)
                              ])
                  )

df = (spark
      .read
      .format('jdbc')
      .option("driver", "org.postgresql.Driver")
      .option("url", "jdbc:postgresql://20.226.0.53:5432/db_aulafia")
      .option("dbtable", "db_aulafia.pokemon")
      .option("user", "aulafia")
      .option("password", "aulafia@123")
      .load()
      .withColumn('formas', fn.from_json('formas', schema))
)

df.show(10, False)

+---+----------+-----------+------+----+---------------------------------------------------------+-----+
|id |nome      |experiencia|altura|peso|formas                                                   |type |
+---+----------+-----------+------+----+---------------------------------------------------------+-----+
|1  |bulbasaur |64         |7     |69  |[{bulbasaur, https://pokeapi.co/api/v2/pokemon-form/1/}] |grass|
|2  |ivysaur   |142        |10    |130 |[{ivysaur, https://pokeapi.co/api/v2/pokemon-form/2/}]   |grass|
|3  |venusaur  |263        |20    |1000|[{venusaur, https://pokeapi.co/api/v2/pokemon-form/3/}]  |grass|
|4  |charmander|62         |6     |85  |[{charmander, https://pokeapi.co/api/v2/pokemon-form/4/}]|fire |
|5  |charmeleon|142        |11    |190 |[{charmeleon, https://pokeapi.co/api/v2/pokemon-form/5/}]|fire |
|6  |charizard |267        |17    |905 |[{charizard, https://pokeapi.co/api/v2/pokemon-form/6/}] |fire |
|7  |squirtle  |63         |5     |90  |[{squirtle, htt

# Escrita Minio em parquet

In [3]:
(df
 .write
 .format('parquet')
 .mode('overwrite')
 .save('s3a://exemplo/exemplo_postgres_minio')
 )

### Leitura Minio em parquet

In [4]:
spark.read.format('parquet').load('s3a://exemplo/exemplo_postgres_minio').show(10, False)

+---+----------+-----------+------+----+---------------------------------------------------------+-----+
|id |nome      |experiencia|altura|peso|formas                                                   |type |
+---+----------+-----------+------+----+---------------------------------------------------------+-----+
|1  |bulbasaur |64         |7     |69  |[{bulbasaur, https://pokeapi.co/api/v2/pokemon-form/1/}] |grass|
|2  |ivysaur   |142        |10    |130 |[{ivysaur, https://pokeapi.co/api/v2/pokemon-form/2/}]   |grass|
|3  |venusaur  |263        |20    |1000|[{venusaur, https://pokeapi.co/api/v2/pokemon-form/3/}]  |grass|
|4  |charmander|62         |6     |85  |[{charmander, https://pokeapi.co/api/v2/pokemon-form/4/}]|fire |
|5  |charmeleon|142        |11    |190 |[{charmeleon, https://pokeapi.co/api/v2/pokemon-form/5/}]|fire |
|6  |charizard |267        |17    |905 |[{charizard, https://pokeapi.co/api/v2/pokemon-form/6/}] |fire |
|7  |squirtle  |63         |5     |90  |[{squirtle, htt