In [16]:
#####################################################################################################
#
# Lê os arquivos da camada ouro, que foram gerados a partir das informações da camada prata com os
# dados da API Posicao da SPTRANS
# 


In [17]:
from pyspark.sql import SparkSession

In [18]:
from pyspark.sql.functions import explode

In [19]:
from pyspark.sql.functions import from_utc_timestamp,split, substring, sum, avg, max,filter, ceil

In [20]:
from datetime import datetime, timedelta
import zoneinfo as zi

In [21]:
spark = SparkSession.builder.appName("FIA-Proj-SPTRANS-Ouro").enableHiveSupport().getOrCreate()

In [22]:
#####################################################################################################
#
# Leitura dos arquivos JSON ingeridos pelo NIFI da API Posicao, referentes a hora anterior a atual
#

In [23]:
#Calcula da hora anterior
GMT = zi.ZoneInfo('GMT')
LOCAL_TZ_STR='America/Sao_Paulo'
LOCAL_TZ = zi.ZoneInfo(LOCAL_TZ_STR)

dt_localtime=datetime.now(tz=LOCAL_TZ)
dt_lasthour= dt_localtime - timedelta(hours=13)

str_lasthour= dt_lasthour.strftime('%Y/%m/%d/%H')


In [24]:
#Seta o path da camada ouro onde foram persistidos a média de nibus circulando por linha por hora
ouro_avg_linha= 's3a://ouro/MEDIA_ONIBUS_POR_LINHA/' +  str_lasthour + "/"

In [25]:
#Seta o path da camada ouro onde foram persistidos o total médio de ônibus circulando por hora
ouro_avg_geral= 's3a://ouro/MEDIA_ONIBUS_GERAL/' +  str_lasthour + "/"

In [26]:
#Seta o path da camada ouro onde ^foram persistidos o total por linha de ônibus circulando em cada medição
ouro_total_linha= 's3a://ouro/TOTAL_ONIBUS_POR_LINHA/' +  str_lasthour + "/"

In [27]:
#Seta o path da camada ouro onde foram persistidos o total de ônibus circulando em cada medição
ouro_total_geral= 's3a://ouro/TOTAL_ONIBUS_GERAL/' +  str_lasthour + "/"

In [28]:
print(ouro_avg_linha)
print(ouro_avg_geral)
print(ouro_total_linha)
print(ouro_total_geral)

s3a://ouro/MEDIA_ONIBUS_POR_LINHA/2024/09/17/17/
s3a://ouro/MEDIA_ONIBUS_GERAL/2024/09/17/17/
s3a://ouro/TOTAL_ONIBUS_POR_LINHA/2024/09/17/17/
s3a://ouro/TOTAL_ONIBUS_GERAL/2024/09/17/17/


In [29]:
#####################################################################################################
#
# Lê os arquivos parquet da camada ouro
#

In [30]:
df_total_linha= spark.read.parquet(ouro_total_linha)

In [31]:
df_total_linha.printSchema()

root
 |-- data_ref: string (nullable = true)
 |-- hora_ref: string (nullable = true)
 |-- id_linha: long (nullable = true)
 |-- sentido_linha: long (nullable = true)
 |-- let_cod_linha: string (nullable = true)
 |-- let_destino: string (nullable = true)
 |-- let_origem: string (nullable = true)
 |-- qtde_onibus: long (nullable = true)



In [32]:
df_total_linha.show(5)

+--------+--------+--------+-------------+-------------+-----------------+-----------------+-----------+
|data_ref|hora_ref|id_linha|sentido_linha|let_cod_linha|      let_destino|       let_origem|qtde_onibus|
+--------+--------+--------+-------------+-------------+-----------------+-----------------+-----------+
|20240917|   17:50|   34909|            2|      1017-10|CONEXÃO VL. IÓRIO|            PERUS|         10|
|20240917|   17:50|   34668|            2|      807A-10| TERM. STO. AMARO|TERM. CAMPO LIMPO|          6|
|20240917|   17:50|   33846|            2|      3098-10| SHOP. ARICANDUVA|JD. SÃO FRANCISCO|          8|
|20240917|   17:50|   34380|            2|      3787-10|   METRÔ ITAQUERA|  CID. TIRADENTES|          6|
|20240917|   17:50|     706|            1|      278A-10|          PENHA  |    METRÔ SANTANA|          3|
+--------+--------+--------+-------------+-------------+-----------------+-----------------+-----------+
only showing top 5 rows



In [33]:
df_total_geral= spark.read.parquet(ouro_total_geral)

In [34]:
df_total_geral.printSchema()

root
 |-- data_ref: string (nullable = true)
 |-- hora_ref: string (nullable = true)
 |-- qtde_onibus: long (nullable = true)



In [35]:
df_total_geral.show(5)

+--------+--------+-----------+
|data_ref|hora_ref|qtde_onibus|
+--------+--------+-----------+
|20240917|   17:45|      11398|
|20240917|   17:55|      11431|
|20240917|   17:50|      11411|
+--------+--------+-----------+



In [36]:
df_avg_hr_linha= spark.read.parquet(ouro_avg_linha)

In [37]:
df_avg_hr_linha.printSchema()

root
 |-- data_ref: string (nullable = true)
 |-- hora_id_ref: string (nullable = true)
 |-- id_linha: long (nullable = true)
 |-- sentido_linha: long (nullable = true)
 |-- let_cod_linha: string (nullable = true)
 |-- let_destino: string (nullable = true)
 |-- let_origem: string (nullable = true)
 |-- qtde_onibus: long (nullable = true)



In [38]:
df_avg_hr_linha.show(5)

+--------+-----------+--------+-------------+-------------+-----------------+-------------------+-----------+
|data_ref|hora_id_ref|id_linha|sentido_linha|let_cod_linha|      let_destino|         let_origem|qtde_onibus|
+--------+-----------+--------+-------------+-------------+-----------------+-------------------+-----------+
|20240917|         17|   34722|            2|      7011-31|       JD. ÂNGELA|       JD. DOS REIS|          4|
|20240917|         17|     483|            1|      6232-10|METRÔ BARRA FUNDA|PINHEIROS/VILA IDA |          2|
|20240917|         17|    1055|            1|      3766-10|   METRÔ ITAQUERA|           COHAB II|          7|
|20240917|         17|     252|            1|      263C-10|         COHAB II|         JD. HELENA|          4|
|20240917|         17|   34036|            2|      9012-10|       TERM. LAPA|          ITABERABA|          2|
+--------+-----------+--------+-------------+-------------+-----------------+-------------------+-----------+
only showi

In [39]:
df_avg_geral= spark.read.parquet(ouro_avg_geral)

In [40]:
df_avg_geral.printSchema()

root
 |-- data_ref: string (nullable = true)
 |-- hora_id_ref: string (nullable = true)
 |-- qtde_onibus: long (nullable = true)



In [41]:
df_avg_geral.show(5)

+--------+-----------+-----------+
|data_ref|hora_id_ref|qtde_onibus|
+--------+-----------+-----------+
|20240917|         17|      11414|
+--------+-----------+-----------+



In [42]:
#####################################################################################################
#
# Conecta no postgres

In [43]:
url = "jdbc:postgresql://db:5432/dvdrental"
properties = {
"user": "admin",
"password": "admin",
"driver": "org.postgresql.Driver"
}

In [44]:
df_avg_geral.write.jdbc( url=url, table='sptrans.media_onibus_geral',mode="append", properties= properties)

In [45]:
#####################################################################################################
#
# Fim do processamento