In [22]:
#iniciar spark
from pyspark import SparkConf
from pyspark.sql import SparkSession
import json
import boto3

conf = SparkConf()
conf.set('spark.jars.packages', 'org.apache.hadoop:hadoop-aws:3.2.0')
conf.set('spark.hadoop.fs.s3a.aws.credentials.provider', 'com.amazonaws.auth.InstanceProfileCredentialsProvider')
spark = SparkSession.builder.config(conf=conf).getOrCreate()

bucket_refined = 'refined-dev-underdogs'
bucket_trusted = 'trusted-dev-underdogs'


file_name = 'iotcore/frequenciacardiaca/payload.json'
caminho_arquivo_s3 = f's3a://raw-dev-underdogs/{file_name}'

In [23]:
df = spark.read.json(caminho_arquivo_s3)    

df.show()


new_df_refined = df.select(df.nomePaciente.alias('nome_paciente'),
                           df.idadePaciente.alias('idadePaciente'),
                           df.generoPaciente.alias('generoPaciente'),
                           df.frequenciaCardiaca.alias('frequenciaCardiaca'),
                           df.dataLeitura.alias('dataLeitura'),
                           df.espacoUtilizado.alias('espacoUtilizado'),
                           df.tempoUtilizado.alias('tempoUtilizado'),
                           df.zonaDisponibilidade.alias('zonaDisponibilidade'),
                           df.bateriaDisponivel.alias('bateriaDisponivel'))


new_df_refined.show()

dados_refined = new_df_refined.collect()


json_refined = json.dumps(dados_refined)
print(json_refined)

session = boto3.Session()

s3 = session.client('s3')



s3.put_object(Body=json_refined,Bucket=bucket_refined,Key=file_name)

+-----------------+-------------------+------------------+------------------+--------------+-------------+------------+--------------------+-------------------+
|bateriaDisponivel|        dataLeitura|   espacoUtilizado|frequenciaCardiaca|generoPaciente|idadePaciente|nomePaciente|      tempoUtilizado|zonaDisponibilidade|
+-----------------+-------------------+------------------+------------------+--------------+-------------+------------+--------------------+-------------------+
|            99.85|2023-08-06 20:41:58|1.9073486328125E-4|               108|             M|           20|       Italo|3.862380981445312...|          localhost|
+-----------------+-------------------+------------------+------------------+--------------+-------------+------------+--------------------+-------------------+

+-------------+-------------+--------------+------------------+-------------------+------------------+--------------------+-------------------+-----------------+
|nome_paciente|idadePaciente|gen

{'ResponseMetadata': {'RequestId': '9BRJZ33PNYCNFM5Y',
  'HostId': 'yOUCYSA2UEgQtYj9tGAHcX57zAludw176wVpikOwwn8vyzOZaHosvG4lvkTaGRbsym3AzC9iuxg=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'yOUCYSA2UEgQtYj9tGAHcX57zAludw176wVpikOwwn8vyzOZaHosvG4lvkTaGRbsym3AzC9iuxg=',
   'x-amz-request-id': '9BRJZ33PNYCNFM5Y',
   'date': 'Sun, 06 Aug 2023 22:00:52 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"b88a175287c1acc2054e5562a7e137f3"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"b88a175287c1acc2054e5562a7e137f3"',
 'ServerSideEncryption': 'AES256'}

In [24]:
new_df_trusted = df.select(df.nomePaciente.alias('nome_paciente'),
                           df.idadePaciente.alias('idadePaciente'),
                           df.generoPaciente.alias('generoPaciente'),
                           df.frequenciaCardiaca.alias('frequenciaCardiaca'),
                           df.dataLeitura.alias('dataLeitura'))


new_df_trusted.show()

dados_trusted = new_df_trusted.collect()


json_trusted = json.dumps(dados_trusted)
print(json_trusted)

s3.put_object(Body=json_trusted,Bucket=bucket_trusted,Key=file_name)

+-------------+-------------+--------------+------------------+-------------------+
|nome_paciente|idadePaciente|generoPaciente|frequenciaCardiaca|        dataLeitura|
+-------------+-------------+--------------+------------------+-------------------+
|        Italo|           20|             M|               108|2023-08-06 20:41:58|
+-------------+-------------+--------------+------------------+-------------------+

[["Italo", 20, "M", 108, "2023-08-06 20:41:58"]]


{'ResponseMetadata': {'RequestId': '0DKE54S9AABHVC80',
  'HostId': 'IKsFvdY9/CBplEGJpVALM0KoS6XaMWXbaAa2gCjI+z7IZMiePHrOMwWPzvHs7aAWRBxvIxVKjyndO6a0SQLwOg==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'IKsFvdY9/CBplEGJpVALM0KoS6XaMWXbaAa2gCjI+z7IZMiePHrOMwWPzvHs7aAWRBxvIxVKjyndO6a0SQLwOg==',
   'x-amz-request-id': '0DKE54S9AABHVC80',
   'date': 'Sun, 06 Aug 2023 22:00:59 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"77550b3fcfdbde1b947aad3288164060"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"77550b3fcfdbde1b947aad3288164060"',
 'ServerSideEncryption': 'AES256'}