In [8]:
import os
from pyspark.sql import SparkSession

# Informações de conexão com o PostgreSQL
DB_HOST = "airflow-postgres"
DB_PORT = "5432"
DB_USER = os.getenv('POSTGRES_USER')
DB_PASSWORD = os.getenv('POSTGRES_PASSWORD')
DB_NAME = "azurecost"

# Inicializa a SparkSession normalmente
spark = SparkSession.builder.getOrCreate()

jdbc_url = f"jdbc:postgresql://{DB_HOST}:{DB_PORT}/{DB_NAME}"

In [9]:
table_name = "resources"

df_resources = spark.read \
  .format("jdbc") \
  .option("url", jdbc_url) \
  .option("dbtable", table_name) \
  .option("user", DB_USER) \
  .option("password", DB_PASSWORD) \
  .option("driver", "org.postgresql.Driver") \
  .load()

In [10]:
df_resources.show(truncate=False)

+---+----------------------+------------------------------------+-------------------------------------+-----------------+--------------+--------+--------------+
|id |resourcename          |subscriptionid                      |resourcegroup                        |provider         |statusrecourse|currency|tendenciacusto|
+---+----------------------+------------------------------------+-------------------------------------+-----------------+--------------+--------+--------------+
|1  |appfunckabum          |da483b95-1caf-404c-bfe4-36abef87f6e6|nintendoproject                      |microsoft.web    |Ativo         |BRL     |Estável       |
|2  |appfuncmagalu         |da483b95-1caf-404c-bfe4-36abef87f6e6|nintendoproject                      |microsoft.web    |Ativo         |BRL     |Estável       |
|3  |dbstorage7ifgyhjijpdgi|da483b95-1caf-404c-bfe4-36abef87f6e6|nintendodatabrickswi86no-workspace-rg|microsoft.storage|Ativo         |BRL     |Estável       |
|4  |nintendoservplan      |da483b

In [11]:
table_name = "costresources"

df_cost = spark.read \
  .format("jdbc") \
  .option("url", jdbc_url) \
  .option("dbtable", table_name) \
  .option("user", DB_USER) \
  .option("password", DB_PASSWORD) \
  .option("driver", "org.postgresql.Driver") \
  .load()

In [12]:
df_cost.show(truncate=False)

+---+----------------+----------+----------------+-------------------+
|id |pretaxcost      |pct_change|previsaoproxima |usagedate          |
+---+----------------+----------+----------------+-------------------+
|1  |0.0             |0.0       |0.0             |2025-08-01 18:00:00|
|1  |0.0             |0.0       |0.0             |2025-08-01 17:55:00|
|1  |0.0             |0.0       |0.0             |2025-08-01 17:50:00|
|1  |0.0             |0.0       |0.0             |2025-08-01 17:45:00|
|4  |1.38148335      |0.0       |1.38148335      |2025-08-01 18:00:00|
|4  |1.38148335      |0.0       |1.38148335      |2025-08-01 17:55:00|
|4  |1.38148335      |0.0       |1.38148335      |2025-08-01 17:50:00|
|4  |1.38148335      |0.0       |1.38148335      |2025-08-01 17:45:00|
|3  |0.00340925033916|0.0       |0.00340925033916|2025-08-01 18:00:00|
|3  |0.00340925033916|0.0       |0.00340925033916|2025-08-01 17:55:00|
|3  |0.00340925033916|0.0       |0.00340925033916|2025-08-01 17:50:00|
|3  |0

In [13]:
df_cost.createOrReplaceTempView("tabela_cost")
df_resources.createOrReplaceTempView("tabela_resources")

In [14]:
creatview_resourcegroup_totals = spark.sql("""
WITH last_per_day AS (
  SELECT Id, PreTaxCost, UsageDate
  FROM (
      SELECT *,
             ROW_NUMBER() OVER (
                 PARTITION BY Id, DATE(UsageDate)
                 ORDER BY UsageDate DESC
             ) as rn
      FROM tabela_cost
  ) t
  WHERE rn = 1
)

SELECT r.ResourceGroup, SUM(l.PreTaxCost) as Total_PreTaxCost
FROM last_per_day l
LEFT JOIN tabela_resources r
  ON l.Id = r.Id
GROUP BY r.ResourceGroup
ORDER BY r.ResourceGroup
""")

creatview_resourcegroup_totals.show(truncate=False)

+-------------------------------------+------------------+
|ResourceGroup                        |Total_PreTaxCost  |
+-------------------------------------+------------------+
|nintendodatabrickswi86no-workspace-rg|0.00340925033916  |
|nintendoproject                      |1.4205268659960002|
+-------------------------------------+------------------+



In [22]:
creatview_resourcename_totals = spark.sql("""
WITH last_per_day AS (
  SELECT Id, PreTaxCost, UsageDate
  FROM (
      SELECT *,
             ROW_NUMBER() OVER (
                 PARTITION BY Id, DATE(UsageDate)
                 ORDER BY UsageDate DESC
             ) as rn
      FROM tabela_cost
  ) t
  WHERE rn = 1
)

SELECT r.ResourceName, SUM(l.PreTaxCost) as Total_PreTaxCost
FROM last_per_day l
LEFT JOIN tabela_resources r
  ON l.Id = r.Id
GROUP BY r.ResourceName
ORDER BY r.ResourceName
""")

creatview_resourcename_totals.show(truncate=False)

+----------------------+----------------+
|ResourceName          |Total_PreTaxCost|
+----------------------+----------------+
|appfunckabum          |0.0             |
|appfuncmagalu         |0.0             |
|dbstorage7ifgyhjijpdgi|0.00340925033916|
|nintendoservplan      |1.38148335      |
|nintendostorageaccount|0.039043515996  |
+----------------------+----------------+



In [21]:
creatview_cost_by_date = spark.sql("""
WITH max_usagedate_per_day AS (
  SELECT
    to_date(usagedate) AS day,
    MAX(usagedate) AS max_usagedate
  FROM
    tabela_cost
  GROUP BY
    to_date(usagedate)
),
filtered AS (
  SELECT
    usagedate,
    pretaxcost
  FROM
    tabela_cost t
  INNER JOIN
    max_usagedate_per_day m
  ON
    to_date(t.usagedate) = m.day AND t.usagedate = m.max_usagedate
)
SELECT
  to_date(usagedate) AS usagedate,
  SUM(pretaxcost) AS pretaxcost_sum
FROM
  filtered
GROUP BY
  to_date(usagedate)
ORDER BY
  usagedate
""")
creatview_cost_by_date.show(truncate=False)

+----------+------------------+
|usagedate |pretaxcost_sum    |
+----------+------------------+
|2025-08-01|1.4239361163351603|
+----------+------------------+



In [None]:
information = spark.sql("""
WITH max_usagedate_per_id AS (
  SELECT
    id,
    MAX(usagedate) AS max_usagedate
  FROM
    tabela_cost
  GROUP BY
    id
),
tabela_cost_filtered AS (
  SELECT t2.*
  FROM tabela_cost t2
  INNER JOIN max_usagedate_per_id m
    ON t2.id = m.id AND t2.usagedate = m.max_usagedate
)
SELECT
  t1.resourcename,
  t1.statusrecourse,
  t2f.pct_change,
  t1.tendenciacusto,
  t2f.previsaoproxima
FROM
  tabela_resources t1
LEFT JOIN
  tabela_cost_filtered t2f
ON
  t1.id = t2f.id
""")

information.show(truncate=False)


+----------------------+--------------+----------+--------------+----------------+
|resourcename          |statusrecourse|pct_change|tendenciacusto|previsaoproxima |
+----------------------+--------------+----------+--------------+----------------+
|appfunckabum          |Ativo         |0.0       |Estável       |0.0             |
|dbstorage7ifgyhjijpdgi|Ativo         |0.0       |Estável       |0.00340925033916|
|nintendostorageaccount|Ativo         |0.0       |Estável       |0.039043515996  |
|nintendoservplan      |Ativo         |0.0       |Estável       |1.38148335      |
|appfuncmagalu         |Ativo         |0.0       |Estável       |0.0             |
+----------------------+--------------+----------+--------------+----------------+

